### VISUALIZING DATA###
##REQUIRED DATA FILE: PESenergy.csv. Available from: http://dx.doi.org/10.7910/DVN/ARKOTI
#clean up
rm(list=ls())
##SECTION 3.1: UNIVARIATE GRAPHS IN THE base PACKAGE##
#load energy policy coverage data
pres.energy<-read.csv("PESenergy.csv")
#pres.energy<-read.csv("http://j.mp/PRESenergy")
#draw a histogram of TV coverage
hist(pres.energy$Energy,xlab="Television Stories",main="")
abline(h=0,col='gray60')
box()
#box-and-whisker plot of TV coverage
boxplot(pres.energy$Energy,ylab="Television Stories")
#box-and-whisker plots before and after Nixon speech
pres.energy$post.nixon<-c(rep(0,58),rep(1,122))
boxplot(pres.energy$Energy~pres.energy$post.nixon,
axes=F,ylab="Television Stories")
axis(1,at=c(1,2),labels=c('Before Nov. 1973','After Nov. 1973'))
axis(2)
box()
#SCATTERPLOT#
#quick and dirty
plot(y=pres.energy$Energy,x=pres.energy$oilc)
#beautified
plot(y=pres.energy$Energy,x=pres.energy$oilc,
xlab="Oil Price",ylab="Energy Coverage")
abline(lm(Energy~oilc,data=pres.energy))
#SECTION 3.2.1: LINE GRAPHS WITH plot#
#line plot of energy coverage by month
plot(x=pres.energy$Energy,type="l",axes=F,
xlab='Month', ylab='Television Stories on Energy')
axis(1,at=c(1,37,73,109,145),
labels=c('Jan. 1969','Jan. 1972','Jan. 1975','Jan. 1978','Jan. 1981'),
cex.axis=.7)
axis(2)
abline(h=0,col='gray60')
box()
#alternative version of the line plot of monthly energy coverage
pres.energy$Time<-1:180
plot(y=pres.energy$Energy,x=pres.energy$Time,type="l")
#line plot of oil price per barrel by month
plot(x=pres.energy$oilc,type='l',axes=F,xlab='Month',ylab='Cost of Oil')
axis(1,at=c(1,37,73,109,145),
labels=c('Jan. 1969','Jan. 1972','Jan. 1975','Jan. 1978','Jan. 1981'),
cex.axis=.7)
axis(2)
box()
######################################################################
# simulate trending series
#set the sample size
n <- 300
#create a time index
t <- c(1:300)
#generate a disturbance term for x
delta <- rnorm(n)
#generate the variable x
x <- .1*t + delta
#generate a disturbance term for y
epsilon <- rnorm(n)
#generate the variable y
y <- .5*t + epsilon
#regress y on x
model.1 <- lm(y~x)
#view the results
summary(model.1)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.8203 -3.2465 0.2446 3.5211 15.9706
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.47275 0.57678 4.287 2.45e-05 ***
## x 4.84700 0.03305 146.675 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.078 on 298 degrees of freedom
## Multiple R-squared: 0.9863, Adjusted R-squared: 0.9863
## F-statistic: 2.151e+04 on 1 and 298 DF, p-value: < 2.2e-16
#### quick and dirty way to get some confidence intervals
#install.packages("MBESS") #installation, only necessary once per machine
library(MBESS)
#the following command expects:
#coefficient estimate, standard error, sample size, number of predictors
my.se<-summary(model.1)$sigma*sqrt(summary(model.1)$cov.unscaled[2,2])
my.se
## [1] 0.03304578
MBESS::ci.reg.coef(b.j=model.1$coefficients[2], SE.b.j=my.se, N=300, p=1)
## [1] "95 percent CI limits (with corresponding probability) around the jth population regression coefficient calculated using the (central) t-distribution with 298 degrees of freedom follow."
## $Lower.Limit.for.beta.j
## x
## 4.781967
##
## $Prob.Less.Lower
## [1] 0.025
##
## $Upper.Limit.for.beta.j
## x
## 4.912032
##
## $Prob.Greater.Upper
## [1] 0.025
######################################################################
#differencing series
#set the sample size
n <- 300
#create a time index
t <- c(1:300)
#generate a disturbance term for x
delta <- rnorm(n)
#generate the variable x
x <- .1*t + delta
#generate a disturbance term for y
epsilon <- rnorm(n)
#generate the variable y
y <- .5*t + .5*x + epsilon
#regress y on x
model.1 <- lm(y~x)
summary(model.1)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.2596 -3.4637 0.1056 3.0085 13.5817
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.51811 0.59892 2.535 0.0118 *
## x 5.39431 0.03433 157.143 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.231 on 298 degrees of freedom
## Multiple R-squared: 0.9881, Adjusted R-squared: 0.988
## F-statistic: 2.469e+04 on 1 and 298 DF, p-value: < 2.2e-16
#Install a package
#install.packages("timeSeries")
library(timeSeries)
## Loading required package: timeDate
#difference the series
d.x <- diff(x)
d.y <- diff(y)
d.t <- c(1:299)
#plot the series
plot(x=d.t,y=d.x,type='l')
lines(x=d.t,y=d.y,col='blue',lty=2)
#regress y on x
model.2 <- lm(d.y~d.x)
summary(model.2)
##
## Call:
## lm(formula = d.y ~ d.x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0828 -1.0053 -0.0361 1.0490 3.7437
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.50267 0.08715 5.768 2.01e-08 ***
## d.x 0.46140 0.05837 7.905 5.30e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.503 on 297 degrees of freedom
## Multiple R-squared: 0.1738, Adjusted R-squared: 0.171
## F-statistic: 62.49 on 1 and 297 DF, p-value: 5.295e-14
######################################################################
knitr::include_graphics("Table6-1.pdf")
ARIMA
###CHAPTER 9: TIME SERIES ANALYSIS###
###POLITICAL ANALYSIS USING R, BY JAMIE MONOGAN###
##REQUIRED DATA FILE: PESenergy.csv
#clean up
rm(list=ls())
##SECTION 9.1: THE BOX-JENKINS METHOD##
#load data
#pres.energy<-read.csv("http://j.mp/PRESenergy")
pres.energy<-read.csv("PESenergy.csv")
#autocorrelation and partial autocorrelation functions
acf(pres.energy$Energy,lag.max=24)
pacf(pres.energy$Energy,lag.max=24)
#estimate ARIMA model
ar1.mod<-arima(pres.energy$Energy,order=c(1,0,0))
ar1.mod
##
## Call:
## arima(x = pres.energy$Energy, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.8235 32.9020
## s.e. 0.0416 9.2403
##
## sigma^2 estimated as 502.7: log likelihood = -815.77, aic = 1637.55
#diagnose model
tsdiag(ar1.mod,24)
#autocorrelation and partial autocorrelation functions on residuals
acf(ar1.mod$residuals,lag.max=24)
pacf(ar1.mod$residuals,lag.max=24)
#Ljung-Box Q test
Box.test(ar1.mod$residuals,lag=24,type='Ljung-Box')
##
## Box-Ljung test
##
## data: ar1.mod$residuals
## X-squared = 20.112, df = 24, p-value = 0.6904
######################################################################
#SECTION 9.1.1: TRANSFER FUNCTIONS VERSUS STATIC MODELS#
#Energy coverage examples showing static regression and intervention analysis
#static regression model with ARIMA error process
predictors<-as.matrix(subset(pres.energy,
select=c(rmn1173,grf0175,grf575,jec477,
jec1177,jec479,embargo,hostages,
oilc,Approval,Unemploy)))
static.mod<-stats::arima(pres.energy$Energy,
order=c(1,0,0),
xreg=predictors)
static.mod
##
## Call:
## stats::arima(x = pres.energy$Energy, order = c(1, 0, 0), xreg = predictors)
##
## Coefficients:
## ar1 intercept rmn1173 grf0175 grf575 jec477 jec1177
## 0.8222 5.8822 91.3265 31.8761 -8.2280 29.6446 -6.6967
## s.e. 0.0481 52.9008 15.0884 15.4643 15.2025 15.0831 15.0844
## jec479 embargo hostages oilc Approval Unemploy
## -20.1624 35.3247 -16.5001 0.8855 -0.2479 1.0080
## s.e. 15.2238 15.1200 13.7619 1.0192 0.2816 3.8909
##
## sigma^2 estimated as 379.3: log likelihood = -790.42, aic = 1608.84
#load package
#install.packages('TSA')
library(TSA)
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:timeDate':
##
## kurtosis, skewness
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
#estimate transfer function
dynamic.mod<-TSA::arimax(pres.energy$Energy,order=c(1,0,0),
xreg=predictors[,-1],
xtransf=predictors[,1],
transfer=list(c(1,0)))
dynamic.mod
##
## Call:
## TSA::arimax(x = pres.energy$Energy, order = c(1, 0, 0), xreg = predictors[,
## -1], xtransf = predictors[, 1], transfer = list(c(1, 0)))
##
## Coefficients:
## ar1 intercept grf0175 grf575 jec477 jec1177 jec479
## 0.8262 20.2787 31.5282 -7.9725 29.9820 -6.3304 -19.8179
## s.e. 0.0476 46.6870 13.8530 13.6104 13.5013 13.5011 13.6345
## embargo hostages oilc Approval Unemploy T1-AR1 T1-MA0
## 25.9388 -16.9015 0.5927 -0.2074 0.1660 0.6087 160.6241
## s.e. 13.2305 12.4422 0.9205 0.2495 3.5472 0.0230 17.0388
##
## sigma^2 estimated as 305.1: log likelihood = -770.83, aic = 1569.66
#plot the dynamic effect of the intervention over the raw series
months<-c(1:180)
y.pred<-dynamic.mod$coef[2:12]%*%c(1,predictors[58,-1])+
160.6241*predictors[,1]+160.6241*(.6087^(months-59))*as.numeric(months>59)
## Warning in dynamic.mod$coef[2:12] %*% c(1, predictors[58, -1]) + 160.6241 * : Recycling array of length 1 in array-vector arithmetic is deprecated.
## Use c() or as.vector() instead.
plot(y=pres.energy$Energy,x=months,
xlab="Month",ylab="Energy Policy Stories",
type="l",axes=F)
axis(1,at=c(1,37,73,109,145),
labels=c('Jan. 1969','Jan. 1972','Jan. 1975','Jan. 1978','Jan. 1981'))
axis(2)
box()
lines(y=y.pred,x=months,lty=2,col='blue',lwd=2)
#plot predicted values of the series with a line, with true values as points
months<-c(1:180)
full.pred<-pres.energy$Energy-dynamic.mod$residuals
plot(y=full.pred,x=months,
xlab="Month",ylab="Energy Policy Stories",
type="l",ylim=c(0,225),axes=F)
points(y=pres.energy$Energy,x=months,pch=20)
legend(x=0,y=200,legend=c("Predicted","True"),pch=c(NA,20),lty=c(1,NA))
axis(1,at=c(1,37,73,109,145),
labels=c('Jan. 1969','Jan. 1972','Jan. 1975','Jan. 1978','Jan. 1981'))
axis(2)
box()
######################################################################
#### ESTIMATING A SEASONAL ARIMA MODEL ###
#front matter
rm(list=ls())
library(TSA)
#load data
data(co2)
#write.csv(co2,"co2.csv")
plot(co2)
#first difference model
co2.1 <- arima(co2, order=c(0,1,0))
plot(co2.1$residuals)
acf(co2.1$residuals,24)
pacf(co2.1$residuals,24)
#seasonal difference model
co2.2 <- TSA::arima(co2, order=c(0,1,0),
seasonal=list(order=c(0,1,0), period=12))
plot(co2.2$residuals)
acf(co2.2$residuals,24)
pacf(co2.2$residuals,24)
#Moving Average Components for year and season
co2.3 <- TSA::arima(co2, order=c(0,1,1),
seasonal=list(order=c(0,1,1), period=12))
acf(co2.3$residuals,24)
pacf(co2.3$residuals,24)
Box.test(co2.3$residuals,24,"Ljung-Box")
##
## Box-Ljung test
##
## data: co2.3$residuals
## X-squared = 25.891, df = 24, p-value = 0.3587
co2.3
##
## Call:
## TSA::arima(x = co2, order = c(0, 1, 1), seasonal = list(order = c(0, 1, 1),
## period = 12))
##
## Coefficients:
## ma1 sma1
## -0.5792 -0.8206
## s.e. 0.0791 0.1137
##
## sigma^2 estimated as 0.5446: log likelihood = -139.54, aic = 283.08
######################################################################
### FORECASTING WITH AN ARIMA MODEL ###
#front matter
rm(list=ls())
library(foreign)
library(TSA)
#Input Data
#data <- read.dta(file.choose())
data <- read.dta("HOME2X7.DTA")
#data<-read.dta("//spia.uga.edu/faculty_pages/monogan/teaching/ts/HOME2X7.DTA")
#Bind Two or More Time Series
data <- ts.union(data)
#Diagnose series 2
#Auto- and Cross- Covariance and -Correlation Function Estimation
acf(data$z2,20)
pacf(data$z2,20)
mod.z2 <- TSA::arima(data$z2, order=c(1,0,0))
acf(mod.z2$residuals,20)
pacf(mod.z2$residuals,20)
#Box-Pierce and Ljung-Box Tests
Box.test(mod.z2$residuals,20,"Ljung-Box")
##
## Box-Ljung test
##
## data: mod.z2$residuals
## X-squared = 14.199, df = 20, p-value = 0.8202
#Projections with mod.z2
plot(mod.z2, n.ahead=50, type='l')
######################################################################
#Bush approval example in R
#load data & view series
#bush <- read.dta(file.choose())#BUSHJOB.DTA
bush <- read.dta("BUSHJOB.DTA")
#bush <- read.dta("//spia.uga.edu/faculty_pages/monogan/teaching/ts/BUSHJOB.DTA")
plot(y=bush$approve, x=bush$t, type='l')
#identify arima process
acf(bush$approve,20)
pacf(bush$approve,20)
#Estimate AR(1) model. Using a bit of theory to justify AR(1).
mod.1 <- arima(bush$approve, order=c(1,0,0))
mod.1
##
## Call:
## arima(x = bush$approve, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.9145 56.3717
## s.e. 0.0529 6.4375
##
## sigma^2 estimated as 20.01: log likelihood = -146.76, aic = 297.52
#diagnose arima model
acf(mod.1$residuals,20)
pacf(mod.1$residuals,20)
Box.test(mod.1$residuals,20,"Ljung-Box")
##
## Box-Ljung test
##
## data: mod.1$residuals
## X-squared = 13.303, df = 20, p-value = 0.864
#estimate intervention analysis for september 11 (remember to start with a pulse)
mod.2b <- arimax(bush$approve, order=c(1,0,0),
xtransf=bush$s11, transfer=list(c(1,0)))
mod.2b
##
## Call:
## arimax(x = bush$approve, order = c(1, 0, 0), xtransf = bush$s11, transfer = list(c(1,
## 0)))
##
## Coefficients:
## ar1 intercept T1-AR1 T1-MA0
## 0.8562 56.0327 0.8984 27.6660
## s.e. 0.2028 5.7056 0.0197 4.6373
##
## sigma^2 estimated as 8.997: log likelihood = -126.53, aic = 261.06
#Notes: the second parameter is the numerator term.
#If 0 only, then concurrent effect only.
#The first parameter affects the denominator.
#c(0,0) replicates the effect of just doing "xreg"
#Our parameter estimates look good,
#no need to drop delta or switch to a step function.
#Graph the intervention model
y.pred <- 56.0327 + 27.6660*bush$s11 + 27.6660*(0.8984^(bush$t-9))*as.numeric(bush$t>9)
plot(y=bush$approve, x=bush$t, type='l')
lines(y=y.pred, x=bush$t, lty=2)
#We also can combine the AR(1) and intervention features into forecasts.
#Do this SECOND, though:
full.pred<-bush$approve-mod.2b$residuals
plot(y=bush$approve, x=bush$t)
lines(y=full.pred, x=bush$t)
#expand into the onset of the war in Iraq
#note: the upward movement actually starts to happen one lag out
#simplest specification:
mod.3 <- arimax(bush$approve, order=c(1,0,0),
xtransf=cbind(bush$s11,bush$iraq),
transfer=list(c(1,0),c(1,0)))
mod.3
##
## Call:
## arimax(x = bush$approve, order = c(1, 0, 0), xtransf = cbind(bush$s11, bush$iraq),
## transfer = list(c(1, 0), c(1, 0)))
##
## Coefficients:
## ar1 intercept T1-AR1 T1-MA0 T2-AR1 T2-MA0
## 0.8526 56.1190 0.9025 26.0329 0.7592 3.9335
## s.e. 0.2017 6.1802 0.0193 4.6164 0.2114 3.0440
##
## sigma^2 estimated as 8.509: log likelihood = -125.12, aic = 262.25
#Graph the new intervention model
y.pred <- 56.1190+ 26.0329*bush$s11 +
26.0329*(0.9025^(bush$t-9))*as.numeric(bush$t>9) +
3.9335*bush$iraq + 3.9335*(0.7592^(bush$t-27))*as.numeric(bush$t>27)
plot(y=bush$approve, x=bush$t, type='l')
lines(y=y.pred, x=bush$t, lty=2)
######################################################################
#exploring the 'ccf' function. Also, how to lag variables in R.
#clean up
rm(list=ls())
#The 'ccf' function allows a simple way to get cross-correlations.
#Oddly, 'x' refers to the presumed endogenous variable
#and 'y' refers to the presumed exogenous variable.
#This is opposite of what is usually expected.
#Here is a Monte Carlo simulation describing how this works.
a<-rnorm(1000)
b<-rep(NA,1000)
b[1]<-0
for (i in 2:1000) b[i]<-.5*a[i-1]+rnorm(1)
#view our simulations
plot(a,type='l')
lines(b,lty=2,col='red')
#By the truth, lags of a should predict values of b.
#To get what we normally want from a CCF,
#x is your endogenous variable and y is your exogenous
ccf(x=a,y=b,lag.max=5,
xlab="Negative means x precedes y. Positive means y precedes x.")
ccf(x=b,y=a,lag.max=5,
xlab="Negative means x precedes y. Positive means y precedes x.")
###ADDITIONAL ILLUSTRATION###
#Let's look at some cross-correlations of Greek tourism and terrorist attacks.
#We'll compare 'ccf' to Pearson correlation computation.
#Load data
#https://spia.uga.edu/faculty_pages/monogan/teaching/ts/italy.csv
data <- read.csv('italy.csv')
#View data
plot(data$GRSHARE, type='l')
plot(data$ATTKGR, type="l")
#CCF of unfiltered variables
ccf.output<-ccf(y=data$ATTKGR, x=data$GRSHARE, 12,
xlab="Negative means x precedes y. Positive means y precedes x."); ccf.output
##
## Autocorrelations of series 'X', by lag
##
## -12 -11 -10 -9 -8 -7 -6 -5 -4 -3
## -0.046 0.136 0.064 -0.201 -0.095 0.133 0.159 -0.133 -0.099 0.229
## -2 -1 0 1 2 3 4 5 6 7
## 0.227 -0.114 -0.085 0.184 0.129 -0.165 -0.106 0.162 0.100 -0.179
## 8 9 10 11 12
## -0.214 0.083 0.022 -0.181 -0.245
tour<-ts(data$GRSHARE)
terr<-ts(data$ATTKGR)
l.terr<-lag(terr,-1)
l2.terr<-lag(terr,-2)
l.tour<-lag(tour,-1)
l2.tour<-lag(tour,-2)
data.2<-na.omit(as.data.frame(ts.union(tour,terr,l.terr,l2.terr,l.tour,l2.tour)))
#compare
cor(data.2$tour,data.2$l.terr);ccf.output[1]
## [1] 0.1819211
##
## Autocorrelations of series 'X', by lag
##
## 1
## 0.184
cor(data.2$tour,data.2$l2.terr);ccf.output[2]
## [1] 0.1324894
##
## Autocorrelations of series 'X', by lag
##
## 2
## 0.129
cor(data.2$terr,data.2$l.tour);ccf.output[-1]
## [1] -0.1412183
##
## Autocorrelations of series 'X', by lag
##
## -1
## -0.114
cor(data.2$terr,data.2$l2.tour);ccf.output[-2]
## [1] 0.2313133
##
## Autocorrelations of series 'X', by lag
##
## -2
## 0.227
###################################################################
##TRANSFER FUNCTION EXAMPLE--right track example##
#Is the country on the right track?
#data<-read.csv(file.choose(), header=T) #RIGHTTRK.csv
data<-read.csv("RIGHTTRK.csv",header=T)
#view the data
plot(y=data$righttrk, x=data$time, type='l')
par(new=T)
plot(y=data$Employ, x=data$time,
type='l',lty=2, xlab="", ylab="",axes=F)
axis(4)
#identify ARIMA models
ccf(y=data$Employ, x=data$righttrk,20,
xlab="Negative means x precedes y. Positive means y precedes x.")
acf(data$Employ,20)
pacf(data$Employ,20)
acf(data$righttrk,20)
pacf(data$righttrk,20)
#estimate ARIMA models
mod.righttrk <- arima(data$righttrk, order=c(1,0,0)); mod.righttrk
##
## Call:
## arima(x = data$righttrk, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.8478 54.9556
## s.e. 0.0667 3.1035
##
## sigma^2 estimated as 17.71: log likelihood = -209.13, aic = 422.26
mod.Employ <- arima(data$Employ, order=c(1,0,0)); mod.Employ
##
## Call:
## arima(x = data$Employ, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.9753 94.874
## s.e. 0.0202 0.500
##
## sigma^2 estimated as 0.02077: log likelihood = 36.33, aic = -68.65
#diagnose ARIMA models
acf(mod.righttrk$residuals, 26)
pacf(mod.righttrk$residuals, 26)
Box.test(mod.righttrk$residuals, 26, "Ljung-Box")
##
## Box-Ljung test
##
## data: mod.righttrk$residuals
## X-squared = 33.907, df = 26, p-value = 0.1374
acf(mod.Employ$residuals, 26)
pacf(mod.Employ$residuals, 26)
Box.test(mod.Employ$residuals, 26, "Ljung-Box")
##
## Box-Ljung test
##
## data: mod.Employ$residuals
## X-squared = 22.368, df = 26, p-value = 0.6684
#identify a transfer function
ccf(y=mod.Employ$residuals, x=mod.righttrk$residuals,20,
xlab="Negative means x precedes y. Positive means y precedes x.")
#Perhaps an effect at lag 8 of employment onto right track?
employ<-ts(data$Employ)
l8.employ<-lag(employ,-8)
track<-ts(data$righttrk)
data.2<-na.omit(as.data.frame(ts.union(employ,l8.employ,track)))
head(data.2)
## employ l8.employ track
## 9 95.1 94.7 52.246
## 10 95.3 94.8 52.270
## 11 95.4 94.8 51.528
## 12 95.3 94.9 53.929
## 13 95.4 95.1 61.740
## 14 95.4 95.0 64.358
tf.1 <- arimax(data.2$track, order=c(1,0,0),
xtransf=data.2$l8.employ, transfer=list(c(1,0)))
tf.1
##
## Call:
## arimax(x = data.2$track, order = c(1, 0, 0), xtransf = data.2$l8.employ, transfer = list(c(1,
## 0)))
##
## Coefficients:
## ar1 intercept T1-AR1 T1-MA0
## 0.8440 56.7398 -0.3258 -0.0157
## s.e. 0.0722 13.4663 1.7226 0.1663
##
## sigma^2 estimated as 18.72: log likelihood = -188.07, aic = 384.14
tf.2 <- arimax(data.2$track, order=c(1,0,0),
xtransf=data.2$l8.employ, transfer=list(c(1,1)))
## Warning in arimax(data.2$track, order = c(1, 0, 0), xtransf = data.
## 2$l8.employ, : possible convergence problem: optim gave code=1
tf.2
##
## Call:
## arimax(x = data.2$track, order = c(1, 0, 0), xtransf = data.2$l8.employ, transfer = list(c(1,
## 1)))
##
## Coefficients:
## ar1 intercept T1-AR1 T1-MA0 T1-MA1
## 0.8484 34.6533 -0.0011 -3.7618 3.9804
## s.e. 0.1902 1111.5539 0.0122 6.4995 6.5469
##
## sigma^2 estimated as 18.47: log likelihood = -184.77, aic = 379.53
tf.3 <- arimax(data.2$track, order=c(1,0,0),
xtransf=data.2$l8.employ, transfer=list(c(0,1)))
tf.3
##
## Call:
## arimax(x = data.2$track, order = c(1, 0, 0), xtransf = data.2$l8.employ, transfer = list(c(0,
## 1)))
##
## Coefficients:
## ar1 intercept T1-MA0 T1-MA1
## 0.8426 43.5912 -3.6493 3.7770
## s.e. 0.1790 1058.4958 6.2860 6.2499
##
## sigma^2 estimated as 18.51: log likelihood = -184.82, aic = 377.63
#Diagnose our results from the 8-lag model
acf(tf.3$residuals[-1],26)
pacf(tf.3$residuals[-1],26)
Box.test(tf.3$residuals[-1], 26, "Ljung-Box")
##
## Box-Ljung test
##
## data: tf.3$residuals[-1]
## X-squared = 39.422, df = 26, p-value = 0.04441
ccf(y=mod.Employ$residuals[-(1:8)], x=tf.3$residuals[-1], 20,
xlab="Negative means x precedes y. Positive means y precedes x.")
############################################################
#front matter
rm(list=ls())
#install.packages("lmtest")
#install.packages("dlnm")
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following object is masked from 'package:timeSeries':
##
## time<-
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(dlnm)
## This is dlnm 2.3.6. For details: help(dlnm) and vignette('dlnmOverview').
library(foreign)
library(orcutt)
#load data and create lag structure
bush<-read.dta("BUSHJOB.DTA")
t.s11<-ts(bush$s11)
t.iraq<-ts(bush$iraq)
t.approve<-ts(bush$approve)
lag.approve<-lag(t.approve,-1)
bush.2<-ts.union(t.s11,t.iraq,t.approve,lag.approve)
#run OLS models with and without a lagged DV (static and Koyck)
mod.no.lag<-lm(approve~s11+iraq,data=bush)
summary(mod.no.lag)
##
## Call:
## lm(formula = approve ~ s11 + iraq, data = bush)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.273 -8.627 -3.345 5.477 24.913
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 59.692 1.564 38.166 <2e-16 ***
## s11 17.825 10.948 1.628 0.110
## iraq 2.542 10.948 0.232 0.817
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.84 on 47 degrees of freedom
## Multiple R-squared: 0.05414, Adjusted R-squared: 0.01389
## F-statistic: 1.345 on 2 and 47 DF, p-value: 0.2703
mod.lag<-lm(t.approve~lag.approve+t.s11+t.iraq,data=bush.2)
summary(mod.lag)
##
## Call:
## lm(formula = t.approve ~ lag.approve + t.s11 + t.iraq, data = bush.2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.3815 -1.6173 -0.2157 1.0934 9.8784
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.67096 2.40748 1.525 0.134
## lag.approve 0.93105 0.03916 23.778 < 2e-16 ***
## t.s11 24.16205 2.99376 8.071 2.72e-10 ***
## t.iraq 4.17758 2.98182 1.401 0.168
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.949 on 45 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.9303, Adjusted R-squared: 0.9256
## F-statistic: 200.1 on 3 and 45 DF, p-value: < 2.2e-16
#run Durbin-Watson and Breusch-Godfried tests
dwtest(mod.no.lag)
##
## Durbin-Watson test
##
## data: mod.no.lag
## DW = 0.1932, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
bgtest(mod.no.lag)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: mod.no.lag
## LM test = 39.419, df = 1, p-value = 3.42e-10
bgtest(mod.lag)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: mod.lag
## LM test = 0.22139, df = 1, p-value = 0.638
#run Cochrane-Orcutt
mod.fgls <- cochrane.orcutt(mod.no.lag)
summary(mod.fgls)
## Call:
## lm(formula = approve ~ s11 + iraq, data = bush)
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.1730 6.7836 9.018 9.802e-12 ***
## s11 8.7648 3.1380 2.793 0.007582 **
## iraq -1.1825 3.1380 -0.377 0.708017
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.2439 on 46 degrees of freedom
## Multiple R-squared: 0.1473 , Adjusted R-squared: 0.1102
## F-statistic: 4 on 2 and 46 DF, p-value: < 2.563e-02
##
## Durbin-Watson statistic
## (original): 0.19320 , p-value: 7.342e-21
## (transformed): 1.27699 , p-value: 7.012e-03
#The results differ. In this case,
#I'd say that pulse inputs probably require an LDV
#for the full effect to enter the model.
####DYNAMICS####
#Unrestricted distributed lag model
lag.x<-lag(t.s11,-1)
lag2.x<-lag(t.s11,-2)
lag3.x<-lag(t.s11,-3)
lag4.x<-lag(t.s11,-4)
lag5.x<-lag(t.s11,-5)
bush.3<-ts.union(t.s11,t.iraq,t.approve,lag.approve,
lag.x,lag2.x,lag3.x,lag4.x,lag5.x)
mod.unrestricted <-lm(t.approve~t.s11+lag.x+lag2.x+lag3.x+
lag4.x+lag5.x,data=bush.3)
summary(mod.unrestricted)
##
## Call:
## lm(formula = t.approve ~ t.s11 + lag.x + lag2.x + lag3.x + lag4.x +
## lag5.x, data = bush.3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.3096 -6.9172 -0.2124 5.4341 18.9708
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 57.613 1.327 43.404 < 2e-16 ***
## t.s11 19.903 8.395 2.371 0.02292 *
## lag.x 26.594 8.395 3.168 0.00303 **
## lag2.x 26.992 8.395 3.215 0.00266 **
## lag3.x 24.977 8.395 2.975 0.00507 **
## lag4.x 22.382 8.395 2.666 0.01121 *
## lag5.x 20.424 8.395 2.433 0.01980 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.289 on 38 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.5288, Adjusted R-squared: 0.4544
## F-statistic: 7.108 on 6 and 38 DF, p-value: 3.965e-05
mod.koyck<-lm(t.approve~lag.approve+t.s11,data=bush.2)
summary(mod.koyck)
##
## Call:
## lm(formula = t.approve ~ lag.approve + t.s11, data = bush.2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4769 -1.7113 -0.3193 1.0393 9.7688
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.84870 2.42917 1.584 0.12
## lag.approve 0.92955 0.03955 23.504 < 2e-16 ***
## t.s11 24.06438 3.02411 7.958 3.42e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.98 on 46 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.9272, Adjusted R-squared: 0.9241
## F-statistic: 293.1 on 2 and 46 DF, p-value: < 2.2e-16
#Comparing coefficients
unrestricted<-mod.unrestricted$coef[-1]
koyck<-c(24.0644,24.0644*.9296,24.0644*.9296^2,
24.0644*.9296^3,24.0644*.9296^4,24.0644*.9296^5)
plot(y=unrestricted,x=c(0:5),ylim=c(0,28),type='h',
col='blue',main="Comparing Effects:Unrestricted in Blue")
par(new=T)
plot(y=koyck,x=c(0:5+.1),ylim=c(0,28),xlim=c(0,5),
xlab="",ylab="",axes=F,type='h')
abline(h=0, col='gray60')
#Estimate an Almon Model
basis.s11<-crossbasis(bush$s11, vartype="poly", vardegree=2)
basis.t<-crossbasis(bush$t, vartype="poly", vardegree=2)
mod.almon<-lm(approve~basis.s11+iraq,data=bush)
summary(mod.almon)
##
## Call:
## lm(formula = approve ~ basis.s11 + iraq, data = bush)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.273 -8.627 -3.345 5.477 24.913
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 59.692 1.564 38.166 <2e-16 ***
## basis.s11 22.231 13.655 1.628 0.110
## iraq 2.542 10.948 0.232 0.817
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.84 on 47 degrees of freedom
## Multiple R-squared: 0.05414, Adjusted R-squared: 0.01389
## F-statistic: 1.345 on 2 and 47 DF, p-value: 0.2703
mod.almon.2<-lm(approve~basis.t+s11+iraq,data=bush)
summary(mod.almon.2)
##
## Call:
## lm(formula = approve ~ basis.t + s11 + iraq, data = bush)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.9082 -4.6723 -0.4318 4.0773 19.1607
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 69.328 2.660 26.060 < 2e-16 ***
## basis.t -23.733 5.654 -4.197 0.000122 ***
## s11 11.296 9.538 1.184 0.242369
## iraq 3.003 9.411 0.319 0.751082
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.314 on 46 degrees of freedom
## Multiple R-squared: 0.3161, Adjusted R-squared: 0.2715
## F-statistic: 7.086 on 3 and 46 DF, p-value: 0.0005173
############################################################
##MONTE CARLO CODE##
#What if we have serial correlation and use an LDV?
#How does the Koyck model fare in recovering the truth?
rm(list=ls())
library(TSA)
library(orcutt)
set.seed(10062010)
T<-500
nu<-rnorm(T+50)
d<-rnorm(T+50)
x0 <- rep(NA,T+50)
y0 <- rep(NA,T+50)
e0 <- rep(NA,T+50)
x0[1] <- d[1]
y0[1]<- e0[1] <- nu[1]
for(t in 2:(T+50)){
x0[t]<- .5*x0[t-1] + d[t]
e0[t]<- .1*e0[t-1] + nu[t]
y0[t]<- .8*y0[t-1] + .3*x0[t] +e0[t]
}
y<-ts(y0[51:(T+50)])
x<-ts(x0[51:(T+50)])
time<-c(1:T)
plot(y=y, x=time,type='l',xlab='time',ylab='y')
lines(y=x, x=time, lty=2, col='blue')
axis(4)
mtext("x", side=4)
data2 <- ts.union(y, l.y=lag(y, -1), x)
mod.1 <- lm(y~l.y+x, data=data2); summary(mod.1)
##
## Call:
## lm(formula = y ~ l.y + x, data = data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.06028 -0.70522 -0.01461 0.65556 2.74562
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.02946 0.04498 -0.655 0.513
## l.y 0.85042 0.02153 39.497 < 2e-16 ***
## x 0.29060 0.03956 7.345 8.5e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.002 on 496 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.7718, Adjusted R-squared: 0.7708
## F-statistic: 838.6 on 2 and 496 DF, p-value: < 2.2e-16
###Just autocorrelation with LDV v. FGLS###
rm(list=ls())
T<-500
nu<-rnorm(T+50)
d<-rnorm(T+50)
x0 <- rep(NA,T+50)
y0 <- rep(NA,T+50)
e0 <- rep(NA,T+50)
x0[1] <- d[1]
y0[1]<- e0[1] <- nu[1]
for(t in 2:(T+50)){
x0[t]<- .5*x0[t-1] + d[t]
e0[t]<- .4*e0[t-1] + nu[t]
y0[t]<- .3*x0[t] + e0[t]
}
y<-ts(y0[51:(T+50)])
x<-ts(x0[51:(T+50)])
time<-c(1:T)
plot(y=y, x=time,type='l',xlab='time',ylab='y')
lines(y=x, x=time, lty=2, col='blue')
axis(4)
mtext("x", side=4)
data2 <- ts.union(y, l.y=lag(y, -1), x)
mod.koyck <- lm(y~x+l.y, data=data2)
summary(mod.koyck)
##
## Call:
## lm(formula = y ~ x + l.y, data = data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.02492 -0.68798 0.01846 0.63734 2.99190
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.008148 0.044946 -0.181 0.856
## x 0.227516 0.038532 5.905 6.57e-09 ***
## l.y 0.327687 0.041033 7.986 9.79e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.003 on 496 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.1901, Adjusted R-squared: 0.1869
## F-statistic: 58.23 on 2 and 496 DF, p-value: < 2.2e-16
mod.fgls.lag <- cochrane.orcutt(mod.koyck)
summary(mod.fgls.lag)
## Call:
## lm(formula = y ~ x + l.y, data = data2)
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.013375 0.074195 -0.180 0.8570
## x 0.283946 0.043831 6.478 2.244e-10 ***
## l.y -0.049168 0.043197 -1.138 0.2556
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9959 on 495 degrees of freedom
## Multiple R-squared: 0.0796 , Adjusted R-squared: 0.0759
## F-statistic: 21.4 on 2 and 495 DF, p-value: < 1.202e-09
##
## Durbin-Watson statistic
## (original): 1.94459 , p-value: 2.551e-01
## (transformed): 1.98483 , p-value: 4.317e-01
mod.nolag <- lm(y~x, data=data2)
mod.fgls.nolag <- cochrane.orcutt(mod.nolag)
summary(mod.fgls.nolag)
## Call:
## lm(formula = y ~ x, data = data2)
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.010121 0.069285 -0.146 0.8839
## x 0.283456 0.043382 6.534 1.589e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9951 on 497 degrees of freedom
## Multiple R-squared: 0.0791 , Adjusted R-squared: 0.0773
## F-statistic: 42.7 on 1 and 497 DF, p-value: < 1.589e-10
##
## Durbin-Watson statistic
## (original): 1.28523 , p-value: 4.806e-16
## (transformed): 1.99836 , p-value: 4.9e-01
cbind(c(mod.koyck$coefficients,NA),
c(mod.fgls.lag$coefficients,mod.fgls.lag$rho),
c(mod.fgls.nolag$coefficients,NA,mod.fgls.nolag$rho))
## [,1] [,2] [,3]
## (Intercept) -0.008148304 -0.01337517 -0.01012083
## x 0.227515899 0.28394573 0.28345581
## l.y 0.327687496 -0.04916794 NA
## NA 0.39834895 0.35677715
###Just functional form with LDV v. FGLS###
rm(list=ls())
T<-500
nu<-rnorm(T+50)
d<-rnorm(T+50)
x0 <- rep(NA,T+50)
y0 <- rep(NA,T+50)
e0 <- rep(NA,T+50)
x0[1] <- d[1]
y0[1]<- e0[1] <- nu[1]
for(t in 2:(T+50)){
x0[t]<- .5*x0[t-1] + d[t]
e0[t]<- nu[t]
y0[t]<- .4*y0[t-1] + .3*x0[t] + e0[t]
}
y<-ts(y0[51:(T+50)])
x<-ts(x0[51:(T+50)])
time<-c(1:T)
plot(y=y, x=time,type='l',xlab='time',ylab='y')
lines(y=x, x=time, lty=2, col='blue')
axis(4)
mtext("x", side=4)
data2 <- ts.union(y, l.y=lag(y, -1), x)
mod.koyck <- lm(y~x+l.y, data=data2)
summary(mod.koyck)
##
## Call:
## lm(formula = y ~ x + l.y, data = data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9351 -0.6734 -0.0619 0.7154 3.7746
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.01251 0.04364 0.287 0.774
## x 0.23714 0.03712 6.388 3.87e-10 ***
## l.y 0.43292 0.03865 11.201 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9744 on 496 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.2961, Adjusted R-squared: 0.2933
## F-statistic: 104.3 on 2 and 496 DF, p-value: < 2.2e-16
mod.fgls.lag <- cochrane.orcutt(mod.koyck)
summary(mod.fgls.lag)
## Call:
## lm(formula = y ~ x + l.y, data = data2)
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.013523 0.046736 0.289 0.7724
## x 0.243893 0.038168 6.390 3.834e-10 ***
## l.y 0.384448 0.039722 9.678 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9748 on 495 degrees of freedom
## Multiple R-squared: 0.2527 , Adjusted R-squared: 0.2497
## F-statistic: 83.7 on 2 and 495 DF, p-value: < 4.868e-32
##
## Durbin-Watson statistic
## (original): 1.96535 , p-value: 3.337e-01
## (transformed): 2.00304 , p-value: 4.987e-01
mod.nolag <- lm(y~x, data=data2)
mod.fgls.nolag <- cochrane.orcutt(mod.nolag)
summary(mod.fgls.nolag)
## Call:
## lm(formula = y ~ x, data = data2)
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.016079 0.079074 0.203 0.839
## x 0.247686 0.041722 5.937 5.466e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9797 on 497 degrees of freedom
## Multiple R-squared: 0.0662 , Adjusted R-squared: 0.0643
## F-statistic: 35.2 on 1 and 497 DF, p-value: < 5.466e-09
##
## Durbin-Watson statistic
## (original): 1.13855 , p-value: 1.953e-22
## (transformed): 1.98685 , p-value: 4.41e-01
cbind(c(mod.koyck$coefficients,NA),
c(mod.fgls.lag$coefficients,mod.fgls.lag$rho),
c(mod.fgls.nolag$coefficients,NA,mod.fgls.nolag$rho))
## [,1] [,2] [,3]
## (Intercept) 0.01251197 0.01352252 0.01607909
## x 0.23713503 0.24389315 0.24768556
## l.y 0.43292175 0.38444771 NA
## NA 0.06489019 0.44526542
##############################################################
#estimating two-step Aitken model.
#front matter
rm(list=ls())
#install.packages("lmtest")
#install.packages("dlnm")
library(lmtest)
library(dlnm)
library(foreign)
library(orcutt)
library(dyn)
#load data and declare as time series
qjps<-read.dta("QJPS113.dta")
ts.qjps<-ts(qjps)
#graph the data
par(mar=c(5,4,4,4))
plot(y=qjps$vi,x=qjps$time,type='l',
xlab="Month",ylab="Vote Intention (Solid Black Line)",axes=F)
axis(1,at=seq(456,552,12),labels=c(1998:2006))
axis(2); box()
par(new=T)
plot(y=qjps$xrlag,x=qjps$time,type='l',lty=2,
col='red',xlab="",ylab="",axes=F)
axis(4)
mtext("Exchange Rate (Red Dashed Line)",4,line=2.5)
#OLS with LDV
table.3.1.1<-dyn$lm(vi~lag(vi,-1)+usxr,data=ts.qjps)
summary(table.3.1.1)
##
## Call:
## lm(formula = dyn(vi ~ lag(vi, -1) + usxr), data = ts.qjps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.1426 -1.4956 0.1575 1.4030 13.5110
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.1317 3.6498 0.584 0.56
## lag(vi, -1) 0.8187 0.0577 14.188 <2e-16 ***
## usxr 10.4187 7.0063 1.487 0.14
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.303 on 109 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.7456, Adjusted R-squared: 0.7409
## F-statistic: 159.7 on 2 and 109 DF, p-value: < 2.2e-16
lmtest::bgtest(table.3.1.1)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: table.3.1.1
## LM test = 5.8403, df = 1, p-value = 0.01566
#Modeling lag as a function of lagged exchange rate (instrumental variable)
iv.step<-dyn$lm(lag(vi,-1)~lag(usxr,-1)+usxr,data=ts.qjps)
summary(iv.step)
##
## Call:
## lm(formula = dyn(lag(vi, -1) ~ lag(usxr, -1) + usxr), data = ts.qjps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.947 -3.508 -1.250 3.619 14.670
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.587 6.094 1.245 0.2158
## lag(usxr, -1) -26.544 45.563 -0.583 0.5614
## usxr 91.202 45.065 2.024 0.0454 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.474 on 109 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.2939, Adjusted R-squared: 0.2809
## F-statistic: 22.68 on 2 and 109 DF, p-value: 5.817e-09
qjps$l.vi.hat<-c(NA,iv.step$fitted.values)
#reset as time series
ts.qjps<-ts(qjps)
#Step 2 Regression
table.3.1.2<-dyn$lm(vi~l.vi.hat+usxr,data=ts.qjps)
summary(table.3.1.2)
##
## Call:
## lm(formula = dyn(vi ~ l.vi.hat + usxr), data = ts.qjps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.319 -3.509 -1.339 3.980 12.777
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.1509 13.6871 0.742 0.460
## l.vi.hat -0.3258 1.7472 -0.186 0.852
## usxr 85.4651 115.0038 0.743 0.459
##
## Residual standard error: 5.572 on 109 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.276, Adjusted R-squared: 0.2628
## F-statistic: 20.78 on 2 and 109 DF, p-value: 2.262e-08
bgtest(table.3.1.2)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: table.3.1.2
## LM test = 75.287, df = 1, p-value < 2.2e-16
#get the right residuals
vi<-ts(qjps$vi)
l.vi<-lag(vi,-1)
usxr<-ts(qjps$usxr)
resid.data<-as.data.frame(ts.union(vi,l.vi,usxr))
#r1<-qjps$vi-10.1509+0.3258*qjps$vilag-85.4651*qjps$usxr
r2<-resid.data$vi-10.1509+0.3258*resid.data$l.vi-85.4651*resid.data$usxr
#What's the rho term?
arima(r2,order=c(1,0,0))
##
## Call:
## arima(x = r2, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.8953 0.4381
## s.e. 0.0411 2.6426
##
## sigma^2 estimated as 9.84: log likelihood = -287.77, aic = 579.54
rho<-acf(na.omit(r2),1)$acf[[1]];rho
## [1] 0.8842049
#generalized differences
resid.data$g.vi<-NA
resid.data$g.usxr<-NA
for(t in 2:dim(resid.data)[1]){
resid.data$g.vi[t]<-resid.data$vi[t]-rho*resid.data$vi[t-1]
resid.data$g.usxr[t]<-resid.data$usxr[t]-rho*resid.data$usxr[t-1]
}
#final model
resid.data<-ts(resid.data[-c(1,114),])
table.3.1.3<-dyn$lm(g.vi~lag(g.vi,-1)+g.usxr,data=resid.data)
summary(table.3.1.3)
##
## Call:
## lm(formula = dyn(g.vi ~ lag(g.vi, -1) + g.usxr), data = resid.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.9858 -1.5457 0.0666 1.5558 7.1528
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.32440 1.60952 3.929 0.000151 ***
## lag(g.vi, -1) -0.23718 0.08625 -2.750 0.006989 **
## g.usxr 3.73693 22.41442 0.167 0.867902
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.98 on 108 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.06576, Adjusted R-squared: 0.04846
## F-statistic: 3.801 on 2 and 108 DF, p-value: 0.0254
############################################################
#front matter
rm(list=ls())
#install.packages("lmtest")
#install.packages("dlnm")
library(lmtest)
library(dlnm)
library(foreign)
library(orcutt)
library(dyn)
library(systemfit)
## Loading required package: Matrix
## Loading required package: car
## Loading required package: carData
##
## Please cite the 'systemfit' package as:
## Arne Henningsen and Jeff D. Hamann (2007). systemfit: A Package for Estimating Systems of Simultaneous Equations in R. Journal of Statistical Software 23(4), 1-40. http://www.jstatsoft.org/v23/i04/.
##
## If you have questions, suggestions, or comments regarding the 'systemfit' package, please use a forum or 'tracker' at systemfit's R-Forge site:
## https://r-forge.r-project.org/projects/systemfit/
#load data and declare as time series
qjps<-read.dta("QJPS.dta")
ts.qjps<-ts(qjps)
qjps<-qjps[qjps$n>157,]
###Simultaneous Equation Model###
s1<-cpi~ir+usxr+xrlag1
s2<-ir~cpi+pm+pmlag1
inst <- ~ usxr+xrlag1+pm+pmlag1
table.4.3.1<-systemfit(list(cpi.mod=s1,ir.mod=s2),data=qjps,method="OLS")
summary(table.4.3.1)
##
## systemfit results
## method: OLS
##
## N DF SSR detRCov OLS-R2 McElroy-R2
## system 226 218 1307.72 5.18244 0.700934 0.814598
##
## N DF SSR MSE RMSE R2 Adj R2
## cpi.mod 113 109 1237.2834 11.351224 3.369158 0.707024 0.698961
## ir.mod 113 109 70.4373 0.646214 0.803874 0.528911 0.515945
##
## The covariance matrix of the residuals
## cpi.mod ir.mod
## cpi.mod 11.35122 1.467270
## ir.mod 1.46727 0.646214
##
## The correlations of the residuals
## cpi.mod ir.mod
## cpi.mod 1.000000 0.541752
## ir.mod 0.541752 1.000000
##
##
## OLS estimates for 'cpi.mod' (equation 1)
## Model Formula: cpi ~ ir + usxr + xrlag1
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 156.017568 3.953925 39.45891 < 2e-16 ***
## ir -3.701117 0.279837 -13.22599 < 2e-16 ***
## usxr -8.358601 28.150272 -0.29693 0.76709
## xrlag1 -43.667341 28.405818 -1.53727 0.12713
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.369158 on 109 degrees of freedom
## Number of observations: 113 Degrees of Freedom: 109
## SSR: 1237.283423 MSE: 11.351224 Root MSE: 3.369158
## Multiple R-Squared: 0.707024 Adjusted R-Squared: 0.698961
##
##
## OLS estimates for 'ir.mod' (equation 2)
## Model Formula: ir ~ cpi + pm + pmlag1
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.03452875 3.34720513 3.89415 0.00017024 ***
## cpi -0.08650602 0.02682997 -3.22423 0.00166699 **
## pm 0.01959371 0.01600461 1.22425 0.22349572
## pmlag1 0.00580082 0.01439498 0.40298 0.68775579
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.803874 on 109 degrees of freedom
## Number of observations: 113 Degrees of Freedom: 109
## SSR: 70.437308 MSE: 0.646214 Root MSE: 0.803874
## Multiple R-Squared: 0.528911 Adjusted R-Squared: 0.515945
table.4.3.2<-systemfit(list(cpi.mod=s1,ir.mod=s2),
inst=inst,data=qjps,method="2SLS")
summary(table.4.3.2)
##
## systemfit results
## method: 2SLS
##
## N DF SSR detRCov OLS-R2 McElroy-R2
## system 226 218 1955.4 28.4058 0.552815 0.515335
##
## N DF SSR MSE RMSE R2 Adj R2
## cpi.mod 113 109 1739.606 15.95969 3.99496 0.588080 0.576743
## ir.mod 113 109 215.793 1.97975 1.40704 -0.443237 -0.482959
##
## The covariance matrix of the residuals
## cpi.mod ir.mod
## cpi.mod 15.95969 1.78617
## ir.mod 1.78617 1.97975
##
## The correlations of the residuals
## cpi.mod ir.mod
## cpi.mod 1.000000 0.317764
## ir.mod 0.317764 1.000000
##
##
## 2SLS estimates for 'cpi.mod' (equation 1)
## Model Formula: cpi ~ ir + usxr + xrlag1
## Instruments: ~usxr + xrlag1 + pm + pmlag1
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 164.346777 4.887073 33.62888 < 2e-16 ***
## ir -5.562665 0.452939 -12.28126 < 2e-16 ***
## usxr 23.681169 33.798162 0.70066 0.485005
## xrlag1 -73.811562 34.050006 -2.16774 0.032354 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.994957 on 109 degrees of freedom
## Number of observations: 113 Degrees of Freedom: 109
## SSR: 1739.605673 MSE: 15.959685 Root MSE: 3.994957
## Multiple R-Squared: 0.58808 Adjusted R-Squared: 0.576743
##
##
## 2SLS estimates for 'ir.mod' (equation 2)
## Model Formula: ir ~ cpi + pm + pmlag1
## Instruments: ~usxr + xrlag1 + pm + pmlag1
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -36.9937677 19.2515547 -1.92160 0.0572675 .
## cpi 0.3158850 0.1547962 2.04065 0.0437021 *
## pm 0.1369491 0.0513350 2.66775 0.0088007 **
## pmlag1 0.0564373 0.0312946 1.80342 0.0740852 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.407037 on 109 degrees of freedom
## Number of observations: 113 Degrees of Freedom: 109
## SSR: 215.793103 MSE: 1.979753 Root MSE: 1.407037
## Multiple R-Squared: -0.443237 Adjusted R-Squared: -0.482959
table.4.3.3<-systemfit(list(cpi.mod=s1,ir.mod=s2),
inst=inst,data=qjps,method="3SLS")
summary(table.4.3.3)
##
## systemfit results
## method: 3SLS
##
## N DF SSR detRCov OLS-R2 McElroy-R2
## system 226 218 1950.99 28.2562 0.553823 0.517459
##
## N DF SSR MSE RMSE R2 Adj R2
## cpi.mod 113 109 1735.547 15.92245 3.99029 0.589041 0.577730
## ir.mod 113 109 215.442 1.97653 1.40589 -0.440889 -0.480547
##
## The covariance matrix of the residuals used for estimation
## cpi.mod ir.mod
## cpi.mod 15.95969 1.78617
## ir.mod 1.78617 1.97975
##
## The covariance matrix of the residuals
## cpi.mod ir.mod
## cpi.mod 15.92245 1.79305
## ir.mod 1.79305 1.97653
##
## The correlations of the residuals
## cpi.mod ir.mod
## cpi.mod 1.00000 0.31962
## ir.mod 0.31962 1.00000
##
##
## 3SLS estimates for 'cpi.mod' (equation 1)
## Model Formula: cpi ~ ir + usxr + xrlag1
## Instruments: ~usxr + xrlag1 + pm + pmlag1
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 164.15753 4.88292 33.61872 < 2e-16 ***
## ir -5.55250 0.45281 -12.26231 < 2e-16 ***
## usxr 13.84620 32.13604 0.43086 0.667419
## xrlag1 -63.76429 32.32692 -1.97248 0.051087 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.990294 on 109 degrees of freedom
## Number of observations: 113 Degrees of Freedom: 109
## SSR: 1735.547016 MSE: 15.92245 Root MSE: 3.990294
## Multiple R-Squared: 0.589041 Adjusted R-Squared: 0.57773
##
##
## 3SLS estimates for 'ir.mod' (equation 2)
## Model Formula: ir ~ cpi + pm + pmlag1
## Instruments: ~usxr + xrlag1 + pm + pmlag1
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -36.9294610 19.2488123 -1.91853 0.0576594 .
## cpi 0.3153880 0.1547759 2.03771 0.0440009 *
## pm 0.1382749 0.0508960 2.71681 0.0076681 **
## pmlag1 0.0548498 0.0302491 1.81327 0.0725421 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.405892 on 109 degrees of freedom
## Number of observations: 113 Degrees of Freedom: 109
## SSR: 215.442046 MSE: 1.976533 Root MSE: 1.405892
## Multiple R-Squared: -0.440889 Adjusted R-Squared: -0.480547
###Granger test: 12 lags###
cpi.lag.only<-dyn$lm(cpi~lag(cpi,-1)+lag(cpi,-2)+lag(cpi,-3)+lag(cpi,-4)+
lag(cpi,-5)+lag(cpi,-6)+lag(cpi,-7)+lag(cpi,-8)+
lag(cpi,-9)+lag(cpi,-10)+lag(cpi,-11)+lag(cpi,-12),
data=ts.qjps)
summary(cpi.lag.only)
##
## Call:
## lm(formula = dyn(cpi ~ lag(cpi, -1) + lag(cpi, -2) + lag(cpi,
## -3) + lag(cpi, -4) + lag(cpi, -5) + lag(cpi, -6) + lag(cpi,
## -7) + lag(cpi, -8) + lag(cpi, -9) + lag(cpi, -10) + lag(cpi,
## -11) + lag(cpi, -12)), data = ts.qjps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.99402 -0.17105 -0.01832 0.16813 1.81805
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.41291 0.13232 3.120 0.00202 **
## lag(cpi, -1) 1.11975 0.06382 17.547 < 2e-16 ***
## lag(cpi, -2) -0.11177 0.09587 -1.166 0.24483
## lag(cpi, -3) -0.14885 0.09542 -1.560 0.12009
## lag(cpi, -4) 0.15668 0.09502 1.649 0.10042
## lag(cpi, -5) 0.01745 0.09553 0.183 0.85517
## lag(cpi, -6) 0.15824 0.09439 1.676 0.09494 .
## lag(cpi, -7) -0.24562 0.09442 -2.601 0.00985 **
## lag(cpi, -8) 0.06259 0.09557 0.655 0.51313
## lag(cpi, -9) -0.20611 0.09574 -2.153 0.03232 *
## lag(cpi, -10) 0.18972 0.09684 1.959 0.05124 .
## lag(cpi, -11) 0.04116 0.09735 0.423 0.67279
## lag(cpi, -12) -0.03535 0.06449 -0.548 0.58416
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3328 on 245 degrees of freedom
## (24 observations deleted due to missingness)
## Multiple R-squared: 0.9997, Adjusted R-squared: 0.9996
## F-statistic: 5.999e+04 on 12 and 245 DF, p-value: < 2.2e-16
cpi.full<-dyn$lm(cpi~lag(cpi,-1)+lag(cpi,-2)+lag(cpi,-3)+lag(cpi,-4)+
lag(cpi,-5)+lag(cpi,-6)+lag(cpi,-7)+lag(cpi,-8)+
lag(cpi,-9)+lag(cpi,-10)+lag(cpi,-11)+lag(cpi,-12)+
lag(ir,-1)+lag(ir,-2)+lag(ir,-3)+lag(ir,-4)+lag(ir,-5)+
lag(ir,-6)+lag(ir,-7)+lag(ir,-8)+lag(ir,-9)+lag(ir,-10)+
lag(ir,-11)+lag(ir,-12),data=ts.qjps)
summary(cpi.full)
##
## Call:
## lm(formula = dyn(cpi ~ lag(cpi, -1) + lag(cpi, -2) + lag(cpi,
## -3) + lag(cpi, -4) + lag(cpi, -5) + lag(cpi, -6) + lag(cpi,
## -7) + lag(cpi, -8) + lag(cpi, -9) + lag(cpi, -10) + lag(cpi,
## -11) + lag(cpi, -12) + lag(ir, -1) + lag(ir, -2) + lag(ir,
## -3) + lag(ir, -4) + lag(ir, -5) + lag(ir, -6) + lag(ir, -7) +
## lag(ir, -8) + lag(ir, -9) + lag(ir, -10) + lag(ir, -11) +
## lag(ir, -12)), data = ts.qjps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.89517 -0.17484 -0.00708 0.16823 1.53117
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.634293 0.271712 -2.334 0.02042 *
## lag(cpi, -1) 0.983199 0.065865 14.927 < 2e-16 ***
## lag(cpi, -2) -0.097529 0.092011 -1.060 0.29025
## lag(cpi, -3) -0.130793 0.091586 -1.428 0.15460
## lag(cpi, -4) 0.136000 0.091452 1.487 0.13833
## lag(cpi, -5) -0.002999 0.091925 -0.033 0.97400
## lag(cpi, -6) 0.154568 0.090882 1.701 0.09032 .
## lag(cpi, -7) -0.232758 0.091303 -2.549 0.01144 *
## lag(cpi, -8) 0.062402 0.092556 0.674 0.50085
## lag(cpi, -9) -0.147114 0.092663 -1.588 0.11373
## lag(cpi, -10) 0.140856 0.093565 1.505 0.13357
## lag(cpi, -11) 0.048085 0.093630 0.514 0.60804
## lag(cpi, -12) 0.093170 0.067274 1.385 0.16740
## lag(ir, -1) 0.110335 0.036018 3.063 0.00245 **
## lag(ir, -2) 0.012948 0.044259 0.293 0.77012
## lag(ir, -3) 0.013603 0.042807 0.318 0.75095
## lag(ir, -4) -0.080518 0.040882 -1.970 0.05008 .
## lag(ir, -5) 0.037984 0.040424 0.940 0.34838
## lag(ir, -6) 0.025923 0.040154 0.646 0.51917
## lag(ir, -7) 0.001241 0.040575 0.031 0.97564
## lag(ir, -8) 0.007939 0.040592 0.196 0.84511
## lag(ir, -9) -0.013999 0.038523 -0.363 0.71664
## lag(ir, -10) 0.038416 0.038208 1.005 0.31573
## lag(ir, -11) -0.043406 0.036749 -1.181 0.23875
## lag(ir, -12) -0.032090 0.028373 -1.131 0.25922
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3126 on 233 degrees of freedom
## (24 observations deleted due to missingness)
## Multiple R-squared: 0.9997, Adjusted R-squared: 0.9997
## F-statistic: 3.4e+04 on 24 and 233 DF, p-value: < 2.2e-16
anova(cpi.full,cpi.lag.only)
## Analysis of Variance Table
##
## Model 1: cpi ~ lag(cpi, -1) + lag(cpi, -2) + lag(cpi, -3) + lag(cpi, -4) +
## lag(cpi, -5) + lag(cpi, -6) + lag(cpi, -7) + lag(cpi, -8) +
## lag(cpi, -9) + lag(cpi, -10) + lag(cpi, -11) + lag(cpi, -12) +
## lag(ir, -1) + lag(ir, -2) + lag(ir, -3) + lag(ir, -4) + lag(ir,
## -5) + lag(ir, -6) + lag(ir, -7) + lag(ir, -8) + lag(ir, -9) +
## lag(ir, -10) + lag(ir, -11) + lag(ir, -12)
## Model 2: cpi ~ lag(cpi, -1) + lag(cpi, -2) + lag(cpi, -3) + lag(cpi, -4) +
## lag(cpi, -5) + lag(cpi, -6) + lag(cpi, -7) + lag(cpi, -8) +
## lag(cpi, -9) + lag(cpi, -10) + lag(cpi, -11) + lag(cpi, -12)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 233 22.773
## 2 245 27.142 -12 -4.369 3.725 3.666e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ir.lag.only<-dyn$lm(ir~lag(ir,-1)+lag(ir,-2)+lag(ir,-3)+lag(ir,-4)+
lag(ir,-5)+lag(ir,-6)+lag(ir,-7)+lag(ir,-8)+
lag(ir,-9)+lag(ir,-10)+lag(ir,-11)+lag(ir,-12),
data=ts.qjps)
summary(ir.lag.only)
##
## Call:
## lm(formula = dyn(ir ~ lag(ir, -1) + lag(ir, -2) + lag(ir, -3) +
## lag(ir, -4) + lag(ir, -5) + lag(ir, -6) + lag(ir, -7) + lag(ir,
## -8) + lag(ir, -9) + lag(ir, -10) + lag(ir, -11) + lag(ir,
## -12)), data = ts.qjps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.15071 -0.24305 -0.01453 0.20893 2.43682
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.138286 0.094218 1.468 0.14346
## lag(ir, -1) 0.809780 0.063428 12.767 < 2e-16 ***
## lag(ir, -2) 0.234107 0.081857 2.860 0.00460 **
## lag(ir, -3) -0.102744 0.079026 -1.300 0.19478
## lag(ir, -4) 0.019182 0.075635 0.254 0.80001
## lag(ir, -5) 0.111522 0.074239 1.502 0.13433
## lag(ir, -6) -0.202014 0.073754 -2.739 0.00661 **
## lag(ir, -7) 0.105702 0.074590 1.417 0.15772
## lag(ir, -8) 0.007061 0.074721 0.095 0.92479
## lag(ir, -9) -0.003964 0.070756 -0.056 0.95537
## lag(ir, -10) 0.095079 0.070195 1.354 0.17683
## lag(ir, -11) -0.004996 0.067811 -0.074 0.94133
## lag(ir, -12) -0.092204 0.051588 -1.787 0.07512 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5886 on 245 degrees of freedom
## (24 observations deleted due to missingness)
## Multiple R-squared: 0.9706, Adjusted R-squared: 0.9692
## F-statistic: 674.2 on 12 and 245 DF, p-value: < 2.2e-16
ir.full<-dyn$lm(ir~lag(ir,-1)+lag(ir,-2)+lag(ir,-3)+lag(ir,-4)+
lag(ir,-5)+lag(ir,-6)+lag(ir,-7)+lag(ir,-8)+
lag(ir,-9)+lag(ir,-10)+lag(ir,-11)+lag(ir,-12)+
lag(cpi,-1)+lag(cpi,-2)+lag(cpi,-3)+lag(cpi,-4)+
lag(cpi,-5)+lag(cpi,-6)+lag(cpi,-7)+lag(cpi,-8)+
lag(cpi,-9)+lag(cpi,-10)+lag(cpi,-11)+lag(cpi,-12),
data=ts.qjps)
summary(ir.full)
##
## Call:
## lm(formula = dyn(ir ~ lag(ir, -1) + lag(ir, -2) + lag(ir, -3) +
## lag(ir, -4) + lag(ir, -5) + lag(ir, -6) + lag(ir, -7) + lag(ir,
## -8) + lag(ir, -9) + lag(ir, -10) + lag(ir, -11) + lag(ir,
## -12) + lag(cpi, -1) + lag(cpi, -2) + lag(cpi, -3) + lag(cpi,
## -4) + lag(cpi, -5) + lag(cpi, -6) + lag(cpi, -7) + lag(cpi,
## -8) + lag(cpi, -9) + lag(cpi, -10) + lag(cpi, -11) + lag(cpi,
## -12)), data = ts.qjps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.16549 -0.26191 0.00632 0.25444 2.30835
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.629006 0.497254 3.276 0.00121 **
## lag(ir, -1) 0.726639 0.065915 11.024 < 2e-16 ***
## lag(ir, -2) 0.208562 0.080997 2.575 0.01064 *
## lag(ir, -3) -0.090662 0.078340 -1.157 0.24834
## lag(ir, -4) 0.006675 0.074817 0.089 0.92898
## lag(ir, -5) 0.084879 0.073980 1.147 0.25242
## lag(ir, -6) -0.186244 0.073485 -2.534 0.01192 *
## lag(ir, -7) 0.093743 0.074255 1.262 0.20805
## lag(ir, -8) 0.010252 0.074286 0.138 0.89036
## lag(ir, -9) 0.034446 0.070499 0.489 0.62559
## lag(ir, -10) 0.076992 0.069924 1.101 0.27199
## lag(ir, -11) -0.008874 0.067254 -0.132 0.89514
## lag(ir, -12) -0.078914 0.051925 -1.520 0.12993
## lag(cpi, -1) 0.195327 0.120538 1.620 0.10649
## lag(cpi, -2) 0.076149 0.168387 0.452 0.65153
## lag(cpi, -3) -0.092792 0.167609 -0.554 0.58037
## lag(cpi, -4) 0.108255 0.167364 0.647 0.51838
## lag(cpi, -5) 0.021274 0.168230 0.126 0.89948
## lag(cpi, -6) -0.235922 0.166322 -1.418 0.15739
## lag(cpi, -7) 0.090293 0.167093 0.540 0.58945
## lag(cpi, -8) -0.221096 0.169385 -1.305 0.19308
## lag(cpi, -9) 0.196854 0.169582 1.161 0.24690
## lag(cpi, -10) 0.051456 0.171232 0.301 0.76406
## lag(cpi, -11) -0.175315 0.171350 -1.023 0.30730
## lag(cpi, -12) -0.027991 0.123116 -0.227 0.82035
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5721 on 233 degrees of freedom
## (24 observations deleted due to missingness)
## Multiple R-squared: 0.9736, Adjusted R-squared: 0.9709
## F-statistic: 357.9 on 24 and 233 DF, p-value: < 2.2e-16
anova(ir.full,ir.lag.only)
## Analysis of Variance Table
##
## Model 1: ir ~ lag(ir, -1) + lag(ir, -2) + lag(ir, -3) + lag(ir, -4) +
## lag(ir, -5) + lag(ir, -6) + lag(ir, -7) + lag(ir, -8) + lag(ir,
## -9) + lag(ir, -10) + lag(ir, -11) + lag(ir, -12) + lag(cpi,
## -1) + lag(cpi, -2) + lag(cpi, -3) + lag(cpi, -4) + lag(cpi,
## -5) + lag(cpi, -6) + lag(cpi, -7) + lag(cpi, -8) + lag(cpi,
## -9) + lag(cpi, -10) + lag(cpi, -11) + lag(cpi, -12)
## Model 2: ir ~ lag(ir, -1) + lag(ir, -2) + lag(ir, -3) + lag(ir, -4) +
## lag(ir, -5) + lag(ir, -6) + lag(ir, -7) + lag(ir, -8) + lag(ir,
## -9) + lag(ir, -10) + lag(ir, -11) + lag(ir, -12)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 233 76.273
## 2 245 84.878 -12 -8.6051 2.1906 0.01289 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###Granger test: 4 lags###
cpi.lag.only.4<-dyn$lm(cpi~lag(cpi,-1)+lag(cpi,-2)+lag(cpi,-3)+lag(cpi,-4),
data=ts.qjps)
summary(cpi.lag.only.4)
##
## Call:
## lm(formula = dyn(cpi ~ lag(cpi, -1) + lag(cpi, -2) + lag(cpi,
## -3) + lag(cpi, -4)), data = ts.qjps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9462 -0.1850 -0.0174 0.1911 1.9337
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.46440 0.10808 4.297 2.45e-05 ***
## lag(cpi, -1) 1.12383 0.06008 18.707 < 2e-16 ***
## lag(cpi, -2) -0.13534 0.09095 -1.488 0.137949
## lag(cpi, -3) -0.22444 0.09108 -2.464 0.014382 *
## lag(cpi, -4) 0.23366 0.06005 3.891 0.000127 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3407 on 261 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.9997, Adjusted R-squared: 0.9997
## F-statistic: 1.912e+05 on 4 and 261 DF, p-value: < 2.2e-16
cpi.full.4<-dyn$lm(cpi~lag(cpi,-1)+lag(cpi,-2)+lag(cpi,-3)+lag(cpi,-4)+
lag(ir,-1)+lag(ir,-2)+lag(ir,-3)+lag(ir,-4),
data=ts.qjps)
summary(cpi.full.4)
##
## Call:
## lm(formula = dyn(cpi ~ lag(cpi, -1) + lag(cpi, -2) + lag(cpi,
## -3) + lag(cpi, -4) + lag(ir, -1) + lag(ir, -2) + lag(ir,
## -3) + lag(ir, -4)), data = ts.qjps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.94473 -0.16541 -0.01108 0.16659 1.79147
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.28097 0.22790 -1.233 0.218744
## lag(cpi, -1) 1.06963 0.05895 18.146 < 2e-16 ***
## lag(cpi, -2) -0.12067 0.08756 -1.378 0.169343
## lag(cpi, -3) -0.23088 0.08783 -2.629 0.009090 **
## lag(cpi, -4) 0.28524 0.05918 4.820 2.46e-06 ***
## lag(ir, -1) 0.10257 0.02961 3.464 0.000622 ***
## lag(ir, -2) -0.03079 0.03844 -0.801 0.423779
## lag(ir, -3) 0.03624 0.03714 0.976 0.330154
## lag(ir, -4) -0.06980 0.02838 -2.459 0.014588 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.326 on 257 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.9997, Adjusted R-squared: 0.9997
## F-statistic: 1.045e+05 on 8 and 257 DF, p-value: < 2.2e-16
anova(cpi.full.4,cpi.lag.only.4)
## Analysis of Variance Table
##
## Model 1: cpi ~ lag(cpi, -1) + lag(cpi, -2) + lag(cpi, -3) + lag(cpi, -4) +
## lag(ir, -1) + lag(ir, -2) + lag(ir, -3) + lag(ir, -4)
## Model 2: cpi ~ lag(cpi, -1) + lag(cpi, -2) + lag(cpi, -3) + lag(cpi, -4)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 257 27.313
## 2 261 30.304 -4 -2.9912 7.0365 2.17e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ir.lag.only.4<-dyn$lm(ir~lag(ir,-1)+lag(ir,-2)+lag(ir,-3)+lag(ir,-4),
data=ts.qjps)
summary(ir.lag.only.4)
##
## Call:
## lm(formula = dyn(ir ~ lag(ir, -1) + lag(ir, -2) + lag(ir, -3) +
## lag(ir, -4)), data = ts.qjps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2707 -0.2835 0.0066 0.2200 4.4356
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.10116 0.10103 1.001 0.3176
## lag(ir, -1) 0.81953 0.05842 14.029 <2e-16 ***
## lag(ir, -2) 0.16754 0.07695 2.177 0.0304 *
## lag(ir, -3) -0.02140 0.07450 -0.287 0.7741
## lag(ir, -4) 0.01727 0.05670 0.304 0.7610
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6576 on 261 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.963, Adjusted R-squared: 0.9624
## F-statistic: 1698 on 4 and 261 DF, p-value: < 2.2e-16
ir.full.4<-dyn$lm(ir~lag(ir,-1)+lag(ir,-2)+lag(ir,-3)+lag(ir,-4)+
lag(cpi,-1)+lag(cpi,-2)+lag(cpi,-3)+lag(cpi,-4),
data=ts.qjps)
summary(ir.full.4)
##
## Call:
## lm(formula = dyn(ir ~ lag(ir, -1) + lag(ir, -2) + lag(ir, -3) +
## lag(ir, -4) + lag(cpi, -1) + lag(cpi, -2) + lag(cpi, -3) +
## lag(cpi, -4)), data = ts.qjps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3024 -0.2824 0.0266 0.2453 4.2411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.15920 0.45550 2.545 0.0115 *
## lag(ir, -1) 0.78797 0.05917 13.316 <2e-16 ***
## lag(ir, -2) 0.16350 0.07682 2.128 0.0343 *
## lag(ir, -3) -0.03079 0.07424 -0.415 0.6787
## lag(ir, -4) 0.01576 0.05673 0.278 0.7813
## lag(cpi, -1) 0.01351 0.11781 0.115 0.9088
## lag(cpi, -2) 0.16169 0.17500 0.924 0.3564
## lag(cpi, -3) -0.03366 0.17555 -0.192 0.8481
## lag(cpi, -4) -0.15051 0.11828 -1.272 0.2043
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6516 on 257 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.9642, Adjusted R-squared: 0.9631
## F-statistic: 865.9 on 8 and 257 DF, p-value: < 2.2e-16
anova(ir.full.4,ir.lag.only.4)
## Analysis of Variance Table
##
## Model 1: ir ~ lag(ir, -1) + lag(ir, -2) + lag(ir, -3) + lag(ir, -4) +
## lag(cpi, -1) + lag(cpi, -2) + lag(cpi, -3) + lag(cpi, -4)
## Model 2: ir ~ lag(ir, -1) + lag(ir, -2) + lag(ir, -3) + lag(ir, -4)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 257 109.11
## 2 261 112.87 -4 -3.7584 2.2132 0.06801 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###Doing 2SLS the hard way###
iv.ir<-lm(ir~usxr+xrlag1+pm+pmlag1,data=qjps)
summary(iv.ir)
##
## Call:
## lm(formula = ir ~ usxr + xrlag1 + pm + pmlag1, data = qjps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.43061 -0.55177 -0.09273 0.50482 1.93529
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.46801 0.88256 6.196 1.08e-08 ***
## usxr 6.64162 6.56012 1.012 0.313598
## xrlag1 -12.45636 6.57247 -1.895 0.060736 .
## pm 0.04789 0.01374 3.485 0.000713 ***
## pmlag1 0.02204 0.01381 1.596 0.113323
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7886 on 108 degrees of freedom
## Multiple R-squared: 0.5508, Adjusted R-squared: 0.5342
## F-statistic: 33.11 on 4 and 108 DF, p-value: < 2.2e-16
qjps$ir.hat<-iv.ir$fitted.values
cpi.stage.2<-lm(cpi~ir.hat+usxr+xrlag1,data=qjps)
summary(cpi.stage.2)
##
## Call:
## lm(formula = cpi ~ ir.hat + usxr + xrlag1, data = qjps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6165 -1.8963 -0.3378 1.5081 8.4451
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 164.3468 3.3465 49.109 < 2e-16 ***
## ir.hat -5.5627 0.3102 -17.935 < 2e-16 ***
## usxr 23.6812 23.1441 1.023 0.30848
## xrlag1 -73.8116 23.3166 -3.166 0.00201 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.736 on 109 degrees of freedom
## Multiple R-squared: 0.8068, Adjusted R-squared: 0.8015
## F-statistic: 151.8 on 3 and 109 DF, p-value: < 2.2e-16
iv.cpi<-lm(cpi~usxr+xrlag1+pm+pmlag1,data=qjps)
summary(iv.cpi)
##
## Call:
## lm(formula = cpi ~ usxr + xrlag1 + pm + pmlag1, data = qjps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.1700 -1.8766 -0.4139 1.4373 8.4114
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 133.95144 3.07459 43.567 < 2e-16 ***
## usxr -13.26649 22.85356 -0.581 0.5628
## xrlag1 -4.59976 22.89660 -0.201 0.8412
## pm -0.27964 0.04787 -5.841 5.55e-08 ***
## pmlag1 -0.10873 0.04810 -2.260 0.0258 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.747 on 108 degrees of freedom
## Multiple R-squared: 0.807, Adjusted R-squared: 0.7998
## F-statistic: 112.9 on 4 and 108 DF, p-value: < 2.2e-16
qjps$cpi.hat<-iv.cpi$fitted.values
ir.stage.2<-lm(ir~cpi.hat+pm+pmlag1,data=qjps)
summary(cpi.stage.2)
##
## Call:
## lm(formula = cpi ~ ir.hat + usxr + xrlag1, data = qjps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6165 -1.8963 -0.3378 1.5081 8.4451
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 164.3468 3.3465 49.109 < 2e-16 ***
## ir.hat -5.5627 0.3102 -17.935 < 2e-16 ***
## usxr 23.6812 23.1441 1.023 0.30848
## xrlag1 -73.8116 23.3166 -3.166 0.00201 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.736 on 109 degrees of freedom
## Multiple R-squared: 0.8068, Adjusted R-squared: 0.8015
## F-statistic: 151.8 on 3 and 109 DF, p-value: < 2.2e-16
############################################################
#Example direct Granger test in R
#load data and create ts.union
data<-read.csv('IParmsRace.csv')
y<-as.ts(data$India)
x<-as.ts(data$Pakistan)
l.y<-lag(data$India,-1)
l2.y<-lag(data$India,-2)
l3.y<-lag(data$India,-3)
l4.y<-lag(data$India,-4)
l.x<-lag(data$Pakistan,-1)
l2.x<-lag(data$Pakistan,-2)
l3.x<-lag(data$Pakistan,-3)
l4.x<-lag(data$Pakistan,-4)
data.2<-ts.union(y,x,l.y,l2.y,l3.y,l4.y,l.x,l2.x,l3.x,l4.x,dframe=TRUE)
#I'm going with 4 lags here, but it's up to you!
#Suppose I believe Pakistan's expenditures cause India's.
#Start with linear models of y.
y.with.x<-lm(y~l.y+l2.y+l3.y+l4.y+l.x+l2.x+l3.x+l4.x,data=data.2)
y.without.x<-lm(y~l.y+l2.y+l3.y+l4.y,data=data.2)
#Block F-test that Pakistan informs our conditional expectation of India:
anova(y.with.x, y.without.x)
## Analysis of Variance Table
##
## Model 1: y ~ l.y + l2.y + l3.y + l4.y + l.x + l2.x + l3.x + l4.x
## Model 2: y ~ l.y + l2.y + l3.y + l4.y
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 3 453103
## 2 7 1754921 -4 -1301817 2.1548 0.2772
#It's not significant.
#Does India's expenditures cause Pakistan's?
x.with.y<-lm(x~l.x+l2.x+l3.x+l4.x+l.y+l2.y+l3.y+l4.y,data=data.2)
x.without.y<-lm(x~l.x+l2.x+l3.x+l4.x,data=data.2)
#Block F-test that India informs our conditional expectation of Pakistan:
anova(x.with.y, x.without.y)
## Analysis of Variance Table
##
## Model 1: x ~ l.x + l2.x + l3.x + l4.x + l.y + l2.y + l3.y + l4.y
## Model 2: x ~ l.x + l2.x + l3.x + l4.x
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 3 2254
## 2 7 74298 -4 -72045 23.976 0.01297 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#It's significant, which suggests India's expenditures are
#exogenous to Pakistan's (consistent with Freeman 1983).
#QUICK ROBUSTNESS CHECK: TWO LAGS
#Does Pakistan influence India?
y.with.x<-lm(y~l.y+l2.y+l.x+l2.x,data=data.2)
y.without.x<-lm(y~l.y+l2.y,data=data.2)
anova(y.with.x, y.without.x)
## Analysis of Variance Table
##
## Model 1: y ~ l.y + l2.y + l.x + l2.x
## Model 2: y ~ l.y + l2.y
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 9 1791150
## 2 11 2746479 -2 -955329 2.4001 0.1461
#Does India Influence Pakistan?
x.with.y<-lm(x~l.x+l2.x+l.y+l2.y,data=data.2)
x.without.y<-lm(x~l.x+l2.x,data=data.2)
anova(x.with.y, x.without.y)
## Analysis of Variance Table
##
## Model 1: x ~ l.x + l2.x + l.y + l2.y
## Model 2: x ~ l.x + l2.x
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 9 45070
## 2 11 87800 -2 -42730 4.2663 0.04975 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Same findings.
############################################################
#front matter
rm(list=ls())
library(vars)
## Loading required package: MASS
## Loading required package: strucchange
## Loading required package: sandwich
## Loading required package: urca
#load data on the Canadian economy: productivity, employment, unemployment, real wage
data(Canada)
#run a VAR model, Estimation of a VAR by utilising OLS per equation.
var.model <-vars::VAR(Canada, p=2, type="const")
#type could also be "none", "trend", or "both"
#allow the BIC to choose the best lag length
var.model.2 <-VAR(Canada, p=2, lag.max=10, ic="AIC", type="const")
#Assess the model
plot(var.model)
#test for serial correlation
vars::serial.test(var.model, lags.pt = 16, type = "PT.adjusted")
##
## Portmanteau Test (adjusted)
##
## data: Residuals of VAR object var.model
## Chi-squared = 231.59, df = 224, p-value = 0.3497
#Does employment have a Granger causal effect?
vars::causality(var.model, cause="e")
## $Granger
##
## Granger causality H0: e do not Granger-cause prod rw U
##
## data: VAR object var.model
## F-Test = 6.2768, df1 = 6, df2 = 292, p-value = 3.206e-06
##
##
## $Instant
##
## H0: No instantaneous causality between: e and prod rw U
##
## data: VAR object var.model
## Chi-squared = 26.068, df = 3, p-value = 9.228e-06
#What's the impulse response if we perturb employment?
var.model.irf <- vars::irf(var.model, impulse = "e",
response = c("prod", "rw", "U"), boot =FALSE)
#Impulse response analysis
plot(var.model.irf)
#the following is necessary if you do not want to use the whole data set in your VAR model:
Canada.2<-as.data.frame(Canada)
e<-ts(Canada.2$e)
rw<-ts(Canada.2$rw)
U<-ts(Canada.2$U)
prod<-ts(Canada.2$prod)
Canada.3<-ts.union(e,rw,U,dframe=TRUE)
#run a VAR model where productivity is exogenous
var.with.exogenous <-VAR(Canada.3, p=2, type="const", exogen=prod)
## Warning in VAR(Canada.3, p = 2, type = "const", exogen = prod): No column names supplied in exogen, using: exo1 , instead.
####################################################################
###SOURCE: Patrick Brandt's documentation for MSBVAR on CRAN.###
#install.packages("MSBVAR")
#install.packages("bit")
library(MSBVAR)
#load data
data(IsraelPalestineConflict)
#create a vector of variable names
varnames <- colnames(IsraelPalestineConflict)
#specify Bayesian Vector Autoregression Model
fit.BVAR <- szbvar(IsraelPalestineConflict, p=6, z=NULL,
lambda0=0.6, lambda1=0.1,
lambda3=2, lambda4=0.25, lambda5=0, mu5=0,
mu6=0, nu=3, qm=4,
prior=0, posterior.fit=FALSE)
# Draw from the posterior pdf of the impulse responses.
posterior.impulses <- mc.irf(fit.BVAR, nsteps=10, draws=5000)
# Plot the responses
plot(posterior.impulses, method=c("Sims-Zha2"), component=1,
probs=c(0.16,0.84), varnames=varnames)
####################################################################
#clean up
rm(list=ls())
#load packages
library(foreign)
library(aTSA) #for Dickey-Fuller test
##
## Attaching package: 'aTSA'
## The following object is masked from 'package:vars':
##
## arch.test
## The following object is masked from 'package:graphics':
##
## identify
library(tseries) #for KPSS test
##
## Attaching package: 'tseries'
## The following objects are masked from 'package:aTSA':
##
## adf.test, kpss.test, pp.test
#load data
macro<-read.dta("partyid.dta")
#descriptives and line plots
summary(macro)
## macropart repub democrat time
## Min. :51.00 Min. :20.32 Min. :27.79 Length:160
## 1st Qu.:58.10 1st Qu.:25.07 1st Qu.:41.36 Class :character
## Median :61.15 Median :27.57 Median :44.08 Mode :character
## Mean :61.27 Mean :27.90 Mean :42.90
## 3rd Qu.:65.12 3rd Qu.:31.16 3rd Qu.:45.94
## Max. :69.50 Max. :35.40 Max. :52.82
## qdate consumer papp
## Min. :-28.00 Min. : 47.57 Min. :26.30
## 1st Qu.: 11.75 1st Qu.: 73.63 1st Qu.:47.22
## Median : 51.50 Median : 86.08 Median :57.59
## Mean : 51.50 Mean : 83.30 Mean :56.45
## 3rd Qu.: 91.25 3rd Qu.: 94.92 3rd Qu.:64.83
## Max. :131.00 Max. :106.17 Max. :80.81
plot(y=macro$macropart,x=macro$qdate,type='l',
xlab="Time",ylab="Macropartisanship",axes=F)
axis(1,at=seq(from=-28,to=131,by=16),
label=seq(from=1953,to=1992,by=4))
axis(2);box()
plot(y=macro$consumer,x=macro$qdate,type='l',
xlab="Time",ylab="Consumer Sentiment",axes=F)
axis(1,at=seq(from=-28,to=131,by=16),
label=seq(from=1953,to=1992,by=4));axis(2);box()
plot(y=macro$papp,x=macro$qdate,type='l',
xlab="Time",ylab="Presidential Approval",axes=F)
axis(1,at=seq(from=-28,to=131,by=16),
label=seq(from=1953,to=1992,by=4));axis(2);box()
#ACF and PACF of each series
acf(macro$macropart,16)
pacf(macro$macropart,16)
print(acf(macro$macropart,16))
##
## Autocorrelations of series 'macro$macropart', by lag
##
## 1 2 3 4 5 6 7 8 9 10 11 12
## 0.927 0.871 0.831 0.772 0.704 0.652 0.608 0.572 0.559 0.531 0.510 0.495
## 13 14 15 16
## 0.470 0.432 0.394 0.366
acf(macro$consumer,16)
pacf(macro$consumer,16)
print(acf(macro$consumer,16))
##
## Autocorrelations of series 'macro$consumer', by lag
##
## 1 2 3 4 5 6 7 8 9 10 11 12
## 0.939 0.867 0.794 0.713 0.637 0.571 0.512 0.459 0.422 0.407 0.404 0.401
## 13 14 15 16
## 0.401 0.397 0.389 0.384
acf(macro$papp,16)
pacf(macro$papp,16)
print(acf(macro$papp,16))
##
## Autocorrelations of series 'macro$papp', by lag
##
## 1 2 3 4 5 6 7 8 9 10 11 12
## 0.856 0.682 0.537 0.446 0.366 0.305 0.261 0.216 0.205 0.201 0.173 0.151
## 13 14 15 16
## 0.139 0.151 0.190 0.229
#Dickey-Fuller tests (Table 5.2)
#Note: Stata's default is with drift and no trend, zero lags.
#You can adjust this if you feel the need, though.
tseries::adf.test(macro$macropart)
##
## Augmented Dickey-Fuller Test
##
## data: macro$macropart
## Dickey-Fuller = -2.1515, Lag order = 5, p-value = 0.5131
## alternative hypothesis: stationary
aTSA::adf.test(macro$macropart) #null is nonstationary, so a significant result means stationary
## Augmented Dickey-Fuller Test
## alternative: stationary
##
## Type 1: no drift no trend
## lag ADF p.value
## [1,] 0 -0.368 0.538
## [2,] 1 -0.321 0.551
## [3,] 2 -0.271 0.566
## [4,] 3 -0.294 0.559
## [5,] 4 -0.296 0.558
## Type 2: with drift no trend
## lag ADF p.value
## [1,] 0 -2.25 0.232
## [2,] 1 -1.92 0.358
## [3,] 2 -1.67 0.457
## [4,] 3 -1.89 0.369
## [5,] 4 -2.09 0.293
## Type 3: with drift and trend
## lag ADF p.value
## [1,] 0 -2.49 0.369
## [2,] 1 -2.22 0.481
## [3,] 2 -2.03 0.562
## [4,] 3 -2.21 0.484
## [5,] 4 -2.39 0.412
## ----
## Note: in fact, p.value = 0.01 means p.value <= 0.01
aTSA::adf.test(macro$consumer)
## Augmented Dickey-Fuller Test
## alternative: stationary
##
## Type 1: no drift no trend
## lag ADF p.value
## [1,] 0 -0.636 0.451
## [2,] 1 -0.577 0.472
## [3,] 2 -0.516 0.494
## [4,] 3 -0.463 0.511
## [5,] 4 -0.481 0.505
## Type 2: with drift no trend
## lag ADF p.value
## [1,] 0 -2.25 0.2329
## [2,] 1 -2.45 0.1519
## [3,] 2 -2.47 0.1452
## [4,] 3 -2.69 0.0823
## [5,] 4 -2.59 0.0993
## Type 3: with drift and trend
## lag ADF p.value
## [1,] 0 -2.60 0.3225
## [2,] 1 -2.93 0.1893
## [3,] 2 -3.04 0.1404
## [4,] 3 -3.41 0.0553
## [5,] 4 -3.30 0.0740
## ----
## Note: in fact, p.value = 0.01 means p.value <= 0.01
aTSA::adf.test(macro$papp)
## Augmented Dickey-Fuller Test
## alternative: stationary
##
## Type 1: no drift no trend
## lag ADF p.value
## [1,] 0 -1.033 0.308
## [2,] 1 -1.123 0.276
## [3,] 2 -1.075 0.294
## [4,] 3 -0.887 0.361
## [5,] 4 -0.983 0.327
## Type 2: with drift no trend
## lag ADF p.value
## [1,] 0 -3.36 0.0156
## [2,] 1 -3.95 0.0100
## [3,] 2 -3.68 0.0100
## [4,] 3 -3.09 0.0317
## [5,] 4 -3.30 0.0183
## Type 3: with drift and trend
## lag ADF p.value
## [1,] 0 -3.65 0.0307
## [2,] 1 -4.25 0.0100
## [3,] 2 -3.98 0.0120
## [4,] 3 -3.44 0.0504
## [5,] 4 -3.63 0.0330
## ----
## Note: in fact, p.value = 0.01 means p.value <= 0.01
#Kwiatkowski-Phillips-Schmidt-Shin (KPSS) test (Table 5.6)
aTSA::kpss.test(macro$macropart)
## KPSS Unit Root Test
## alternative: nonstationary
##
## Type 1: no drift no trend
## lag stat p.value
## 2 0.127 0.1
## -----
## Type 2: with drift no trend
## lag stat p.value
## 2 0.296 0.1
## -----
## Type 1: with drift and trend
## lag stat p.value
## 2 0.147 0.0488
## -----------
## Note: p.value = 0.01 means p.value <= 0.01
## : p.value = 0.10 means p.value >= 0.10
#null is stationary, so a significant result means nonstationary
tseries::kpss.test(macro$macropart)
## Warning in tseries::kpss.test(macro$macropart): p-value smaller than
## printed p-value
##
## KPSS Test for Level Stationarity
##
## data: macro$macropart
## KPSS Level = 0.75467, Truncation lag parameter = 4, p-value = 0.01
tseries::kpss.test(macro$consumer)
## Warning in tseries::kpss.test(macro$consumer): p-value smaller than printed
## p-value
##
## KPSS Test for Level Stationarity
##
## data: macro$consumer
## KPSS Level = 1.4267, Truncation lag parameter = 4, p-value = 0.01
tseries::kpss.test(macro$papp)
## Warning in tseries::kpss.test(macro$papp): p-value smaller than printed p-
## value
##
## KPSS Test for Level Stationarity
##
## data: macro$papp
## KPSS Level = 0.93357, Truncation lag parameter = 4, p-value = 0.01
#ARIMA models for macropartisanship, Table 5.7
mod.ar1<-arima(macro$macropart,order=c(1,0,0));mod.ar1
##
## Call:
## arima(x = macro$macropart, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.9286 60.8512
## s.e. 0.0279 1.7692
##
## sigma^2 estimated as 2.946: log likelihood = -314.46, aic = 632.92
mod.i1<-arima(macro$macropart,order=c(0,1,0));mod.i1
##
## Call:
## arima(x = macro$macropart, order = c(0, 1, 0))
##
##
## sigma^2 estimated as 3.06: log likelihood = -314.52, aic = 629.04
mod.ari11<-arima(macro$macropart,order=c(1,1,0));mod.ari11
##
## Call:
## arima(x = macro$macropart, order = c(1, 1, 0))
##
## Coefficients:
## ar1
## -0.1450
## s.e. 0.0787
##
## sigma^2 estimated as 2.995: log likelihood = -312.84, aic = 627.68
#diagnose each
acf(mod.ar1$resid,16)
pacf(mod.ar1$resid,16)
Box.test(mod.ar1$resid,16,"Ljung-Box")
##
## Box-Ljung test
##
## data: mod.ar1$resid
## X-squared = 28.493, df = 16, p-value = 0.02759
acf(mod.i1$resid,16)
pacf(mod.i1$resid,16)
Box.test(mod.i1$resid,16,"Ljung-Box")
##
## Box-Ljung test
##
## data: mod.i1$resid
## X-squared = 31.155, df = 16, p-value = 0.01286
acf(mod.ari11$resid,16)
pacf(mod.ari11$resid,16)
Box.test(mod.ari11$resid,16,"Ljung-Box")
##
## Box-Ljung test
##
## data: mod.ari11$resid
## X-squared = 28.947, df = 16, p-value = 0.0243
#Just for fun. An alternative estimator of the ARI(1,1) model.
library(dyn)
d.macropart<-ts(diff(macro$macropart))
macro$d.macropart<-c(NA,d.macropart)
head(macro)
## macropart repub democrat time qdate consumer papp d.macropart
## 1 60.5 31.57 46.87 1953q1 -28 94.73 69.83 NA
## 2 59.1 31.38 47.02 1953q2 -27 91.60 73.67 -1.4000015
## 3 57.8 33.07 45.30 1953q3 -26 87.80 73.67 -1.2999992
## 4 59.2 31.42 45.25 1953q4 -25 84.50 63.83 1.4000015
## 5 59.4 30.07 44.08 1954q1 -24 85.23 68.67 0.2000008
## 6 59.7 31.63 43.88 1954q2 -23 86.20 63.50 0.2999992
ls.ari11<-dyn$lm(d.macropart~lag(d.macropart,-1));summary(ls.ari11)
##
## Call:
## lm(formula = dyn(d.macropart ~ lag(d.macropart, -1)))
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.2063 -0.9269 0.0049 1.2141 3.8354
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.02479 0.13875 -0.179 0.8584
## lag(d.macropart, -1) -0.14565 0.07936 -1.835 0.0684 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.744 on 156 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.02113, Adjusted R-squared: 0.01486
## F-statistic: 3.368 on 1 and 156 DF, p-value: 0.06838
rm(list=ls())
library(tseries)
#set obs
n<-500
#generate two disturbances
delta<-rnorm(n)
epsilon<-rnorm(n)
#create two variables
x<-rep(NA,n)
y<-rep(NA,n)
x[1]<-0
y[1]<-0
#create two random walks
for(t in 2:n){
x[t]<-x[t-1]+delta[t]
}
for(t in 2:n){
y[t]<-y[t-1]+epsilon[t]
}
#Are x & y integrated series? Remember, non-significant means integrated.
adf.test(x, k=0)
##
## Augmented Dickey-Fuller Test
##
## data: x
## Dickey-Fuller = -2.4998, Lag order = 0, p-value = 0.3668
## alternative hypothesis: stationary
adf.test(y, k=0)
##
## Augmented Dickey-Fuller Test
##
## data: y
## Dickey-Fuller = -2.2377, Lag order = 0, p-value = 0.4777
## alternative hypothesis: stationary
#Are they cointegrated?
mod.1 <-lm(y~x)
adf.test(mod.1$residuals)
##
## Augmented Dickey-Fuller Test
##
## data: mod.1$residuals
## Dickey-Fuller = -2.6999, Lag order = 7, p-value = 0.282
## alternative hypothesis: stationary
###################################################
rm(list=ls())
library(foreign)
library(timeSeries)
library(tseries)
library(dyn)
library(aTSA)
options(scipen=12)
#load data and create lags
ip<-read.dta("indipaki.dta")
#data cleaning
ip$d.pakds<-c(NA,diff(ip$pakds))
ip$d.indds<-c(NA,diff(ip$indds))
ip.1<-subset(ip,year<1991)
#plot the series
plot(y=ip.1$indds,x=ip.1$year,type='l',
xlab="Year",ylab="Spending (Millions of U.S. Dollars)",axes=F)
lines(y=ip.1$pakds,x=ip.1$year,lty=2,col='blue')
axis(1);axis(2, at=c(0,2000000,4000000,6000000,8000000,10000000),
labels=c(0,2000,4000,6000,8000,10000));box()
legend(x=1950,y=10000000,legend=c("India","Pakistan"),
lty=c(1,2),col=c("black","blue"))
####SUPPOSE WE BELIEVE THEORETICALLY THAT PAKISTAN'S SPENDING SHAPES INDIA'S IN A ONE-WAY RELATIONSHIP###
#Dickey-Fuller tests
aTSA::adf.test(ip.1$pakds) #null is nonstationary, so a significant result means stationary
## Augmented Dickey-Fuller Test
## alternative: stationary
##
## Type 1: no drift no trend
## lag ADF p.value
## [1,] 0 3.35 0.99
## [2,] 1 3.59 0.99
## [3,] 2 3.37 0.99
## [4,] 3 2.89 0.99
## Type 2: with drift no trend
## lag ADF p.value
## [1,] 0 1.79 0.99
## [2,] 1 2.21 0.99
## [3,] 2 2.25 0.99
## [4,] 3 2.07 0.99
## Type 3: with drift and trend
## lag ADF p.value
## [1,] 0 -0.622 0.969
## [2,] 1 -0.464 0.979
## [3,] 2 -0.420 0.981
## [4,] 3 -0.458 0.979
## ----
## Note: in fact, p.value = 0.01 means p.value <= 0.01
aTSA::adf.test(ip.1$indds)
## Augmented Dickey-Fuller Test
## alternative: stationary
##
## Type 1: no drift no trend
## lag ADF p.value
## [1,] 0 3.97 0.99
## [2,] 1 3.70 0.99
## [3,] 2 4.99 0.99
## [4,] 3 3.34 0.99
## Type 2: with drift no trend
## lag ADF p.value
## [1,] 0 2.27 0.99
## [2,] 1 2.40 0.99
## [3,] 2 3.73 0.99
## [4,] 3 2.69 0.99
## Type 3: with drift and trend
## lag ADF p.value
## [1,] 0 -0.368 0.983
## [2,] 1 -0.341 0.984
## [3,] 2 0.763 0.990
## [4,] 3 0.725 0.990
## ----
## Note: in fact, p.value = 0.01 means p.value <= 0.01
#Step 1 of the Engle-Granger Two-Step estimation
equilibrium<-lm(indds~pakds,data=ip.1)
summary(equilibrium)
##
## Call:
## lm(formula = indds ~ pakds, data = ip.1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2152591 -343570 -323 280093 1550765
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -62240.4504 130591.2605 -0.477 0.636
## pakds 3.4040 0.1084 31.390 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 591200 on 41 degrees of freedom
## Multiple R-squared: 0.9601, Adjusted R-squared: 0.9591
## F-statistic: 985.3 on 1 and 41 DF, p-value: < 0.00000000000000022
ip.1$z<-equilibrium$residuals
head(ip.1)
## year pakds indds res1 res1b res2 d.pakds d.indds
## 1 1948 107414 275196 -132230.92 -285613.9 -22561.518 NA NA
## 2 1949 187161 503918 -113920.27 -305309.3 -7322.844 79747 228722
## 3 1950 199398 366073 -64657.13 -481273.4 43791.703 12237 -137845
## 4 1951 244578 383874 -24258.60 -604211.3 83951.141 45180 17801
## 5 1952 282563 390843 11854.47 -715568.2 119970.617 37985 6969
## 6 1953 246233 403943 -27994.28 -589297.8 79945.922 -36330 13100
## z
## 1 -28198.78
## 2 -70934.07
## 3 -250433.58
## 4 -386424.45
## 5 -508755.68
## 6 -371989.04
#Are the residuals stationary?
aTSA::adf.test(ip.1$z) #null is nonstationary, so a significant result means stationary
## Augmented Dickey-Fuller Test
## alternative: stationary
##
## Type 1: no drift no trend
## lag ADF p.value
## [1,] 0 -3.48 0.0100
## [2,] 1 -3.19 0.0100
## [3,] 2 -2.63 0.0101
## [4,] 3 -3.14 0.0100
## Type 2: with drift no trend
## lag ADF p.value
## [1,] 0 -3.44 0.0180
## [2,] 1 -3.15 0.0341
## [3,] 2 -2.58 0.1134
## [4,] 3 -3.08 0.0398
## Type 3: with drift and trend
## lag ADF p.value
## [1,] 0 -3.44 0.0643
## [2,] 1 -3.15 0.1188
## [3,] 2 -2.56 0.3436
## [4,] 3 -3.03 0.1643
## ----
## Note: in fact, p.value = 0.01 means p.value <= 0.01
plot(y=ip.1$z,x=ip.1$year,type='l')
acf(ip.1$z,15)
pacf(ip.1$z,15)
#Step 2, Version A: zero lag model testing for error correction only
ip.2<-ts(ip.1)
ind.0<-dyn$lm(d.indds~lag(z,-1),data=ip.2)
summary(ind.0)
##
## Call:
## lm(formula = dyn(d.indds ~ lag(z, -1)), data = ip.2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -879370 -257132 -90469 87393 1514427
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 232582.2303 72495.7990 3.208 0.00263 **
## lag(z, -1) -0.2196 0.1244 -1.765 0.08523 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 469800 on 40 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.07224, Adjusted R-squared: 0.04905
## F-statistic: 3.115 on 1 and 40 DF, p-value: 0.08523
#Step 2, Version B: allowing for error correction and Granger causation
ind.1<-dyn$lm(d.indds~lag(z,-1)+d.pakds,data=ip.2)
summary(ind.1)
##
## Call:
## lm(formula = dyn(d.indds ~ lag(z, -1) + d.pakds), data = ip.2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -868554 -209884 -31850 135506 1185488
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 132814.1455 71220.4673 1.865 0.06974 .
## lag(z, -1) -0.3238 0.1153 -2.809 0.00774 **
## d.pakds 1.4856 0.4434 3.350 0.00180 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 419300 on 39 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.2796, Adjusted R-squared: 0.2427
## F-statistic: 7.568 on 2 and 39 DF, p-value: 0.00167
#Engle-Granger Single Equation Estimation
ind.one.step<-dyn$lm(d.indds~lag(indds,-1)+lag(pakds,-1)+d.pakds,data=ip.2)
summary(ind.one.step)
##
## Call:
## lm(formula = dyn(d.indds ~ lag(indds, -1) + lag(pakds, -1) +
## d.pakds), data = ip.2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -969368 -152029 4804 116469 989229
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12089.0072 91148.0767 -0.133 0.89519
## lag(indds, -1) -0.2981 0.1112 -2.682 0.01078 *
## lag(pakds, -1) 1.1906 0.3785 3.146 0.00321 **
## d.pakds 1.2133 0.4441 2.732 0.00949 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 401800 on 38 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.3553, Adjusted R-squared: 0.3044
## F-statistic: 6.981 on 3 and 38 DF, p-value: 0.0007445
####CONSIDERING THE TWO-WAY APPROACH FROM THE TEXT####
#Step 1 of the Engle-Granger Two-Step estimation
equilibrium.2<-lm(pakds~indds,data=ip.1)
summary(equilibrium)
##
## Call:
## lm(formula = indds ~ pakds, data = ip.1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2152591 -343570 -323 280093 1550765
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -62240.4504 130591.2605 -0.477 0.636
## pakds 3.4040 0.1084 31.390 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 591200 on 41 degrees of freedom
## Multiple R-squared: 0.9601, Adjusted R-squared: 0.9591
## F-statistic: 985.3 on 1 and 41 DF, p-value: < 0.00000000000000022
ip.1$z.2<-equilibrium.2$residuals
head(ip.1)
## year pakds indds res1 res1b res2 d.pakds d.indds
## 1 1948 107414 275196 -132230.92 -285613.9 -22561.518 NA NA
## 2 1949 187161 503918 -113920.27 -305309.3 -7322.844 79747 228722
## 3 1950 199398 366073 -64657.13 -481273.4 43791.703 12237 -137845
## 4 1951 244578 383874 -24258.60 -604211.3 83951.141 45180 17801
## 5 1952 282563 390843 11854.47 -715568.2 119970.617 37985 6969
## 6 1953 246233 403943 -27994.28 -589297.8 79945.922 -36330 13100
## z z.2
## 1 -28198.78 -22561.518
## 2 -70934.07 -7322.844
## 3 -250433.58 43791.702
## 4 -386424.45 83951.142
## 5 -508755.68 119970.618
## 6 -371989.04 79945.918
#Step 2, Version A: zero lag model testing for error correction only
ip.2<-ts(ip.1)
ind.0<-dyn$lm(d.indds~lag(z.2,-1),data=ip.2);summary(ind.0)
##
## Call:
## lm(formula = dyn(d.indds ~ lag(z.2, -1)), data = ip.2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -859635 -251706 -110297 132744 1474713
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 234129.7881 70851.3041 3.305 0.00201 **
## lag(z.2, -1) 0.9549 0.4214 2.266 0.02894 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 459200 on 40 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.1138, Adjusted R-squared: 0.0916
## F-statistic: 5.134 on 1 and 40 DF, p-value: 0.02894
pak.0<-dyn$lm(d.pakds~lag(z.2,-1),data=ip.2);summary(pak.0)
##
## Call:
## lm(formula = dyn(d.pakds ~ lag(z.2, -1)), data = ip.2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -311834 -77345 -12809 55785 520417
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 66686.7339 23405.5185 2.849 0.00689 **
## lag(z.2, -1) -0.1925 0.1392 -1.383 0.17441
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 151700 on 40 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.04562, Adjusted R-squared: 0.02176
## F-statistic: 1.912 on 1 and 40 DF, p-value: 0.1744
#Step 2, Version B: allowing for error correction and Granger causation
ind.3<-dyn$lm(d.indds~lag(d.indds,-1)+lag(d.indds,-2)+
lag(d.indds,-3)+lag(d.pakds,-1)+lag(d.pakds,-2)+
lag(d.pakds,-3)+lag(z.2,-1),data=ip.2);summary(ind.3)
##
## Call:
## lm(formula = dyn(d.indds ~ lag(d.indds, -1) + lag(d.indds, -2) +
## lag(d.indds, -3) + lag(d.pakds, -1) + lag(d.pakds, -2) +
## lag(d.pakds, -3) + lag(z.2, -1)), data = ip.2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -839830 -215755 20116 242414 694300
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 182495.24228 83729.27663 2.180 0.037006 *
## lag(d.indds, -1) 0.41021 0.17904 2.291 0.028912 *
## lag(d.indds, -2) -0.06479 0.16694 -0.388 0.700594
## lag(d.indds, -3) 0.70627 0.16550 4.268 0.000173 ***
## lag(d.pakds, -1) -0.48663 0.58371 -0.834 0.410842
## lag(d.pakds, -2) -0.35497 0.57279 -0.620 0.539969
## lag(d.pakds, -3) -2.01313 0.54257 -3.710 0.000811 ***
## lag(z.2, -1) 1.58074 0.52104 3.034 0.004854 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 386800 on 31 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.5024, Adjusted R-squared: 0.39
## F-statistic: 4.471 on 7 and 31 DF, p-value: 0.001547
pak.3<-dyn$lm(d.pakds~lag(d.indds,-1)+lag(d.indds,-2)+
lag(d.indds,-3)+lag(d.pakds,-1)+lag(d.pakds,-2)+
lag(d.pakds,-3)+lag(z.2,-1),data=ip.2);summary(pak.3)
##
## Call:
## lm(formula = dyn(d.pakds ~ lag(d.indds, -1) + lag(d.indds, -2) +
## lag(d.indds, -3) + lag(d.pakds, -1) + lag(d.pakds, -2) +
## lag(d.pakds, -3) + lag(z.2, -1)), data = ip.2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -310088 -47086 -19697 40008 480445
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31854.27416 32052.04119 0.994 0.32800
## lag(d.indds, -1) 0.01674 0.06854 0.244 0.80867
## lag(d.indds, -2) -0.02616 0.06390 -0.409 0.68503
## lag(d.indds, -3) 0.18314 0.06335 2.891 0.00696 **
## lag(d.pakds, -1) -0.04804 0.22345 -0.215 0.83116
## lag(d.pakds, -2) 0.03401 0.21927 0.155 0.87775
## lag(d.pakds, -3) -0.06604 0.20770 -0.318 0.75266
## lag(z.2, -1) -0.15418 0.19946 -0.773 0.44539
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 148100 on 31 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.2924, Adjusted R-squared: 0.1326
## F-statistic: 1.83 on 7 and 31 DF, p-value: 0.1166
#Granger Causality tests
ind.subset<-dyn$lm(d.indds~lag(d.indds,-1)+lag(d.indds,-2)+
lag(d.indds,-3)+lag(z.2,-1),data=ip.2)
summary(ind.subset)
##
## Call:
## lm(formula = dyn(d.indds ~ lag(d.indds, -1) + lag(d.indds, -2) +
## lag(d.indds, -3) + lag(z.2, -1)), data = ip.2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -893993 -192768 -39498 196995 1319706
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 152072.5982 94974.2444 1.601 0.1186
## lag(d.indds, -1) 0.1265 0.1570 0.806 0.4258
## lag(d.indds, -2) -0.1300 0.1639 -0.793 0.4332
## lag(d.indds, -3) 0.4166 0.1626 2.562 0.0150 *
## lag(z.2, -1) 1.0106 0.4216 2.397 0.0222 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 447700 on 34 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.2691, Adjusted R-squared: 0.1831
## F-statistic: 3.129 on 4 and 34 DF, p-value: 0.02703
pak.subset<-dyn$lm(d.pakds~lag(d.pakds,-1)+lag(d.pakds,-2)+
lag(d.pakds,-3)+lag(z.2,-1),data=ip.2)
summary(pak.subset)
##
## Call:
## lm(formula = dyn(d.pakds ~ lag(d.pakds, -1) + lag(d.pakds, -2) +
## lag(d.pakds, -3) + lag(z.2, -1)), data = ip.2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -336515 -67738 -20623 46143 465939
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 51730.095149 33456.928080 1.546 0.131
## lag(d.pakds, -1) 0.006645 0.194366 0.034 0.973
## lag(d.pakds, -2) 0.106523 0.182850 0.583 0.564
## lag(d.pakds, -3) 0.159098 0.182964 0.870 0.391
## lag(z.2, -1) -0.224621 0.170159 -1.320 0.196
##
## Residual standard error: 161900 on 34 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.07282, Adjusted R-squared: -0.03626
## F-statistic: 0.6676 on 4 and 34 DF, p-value: 0.6189
anova(ind.subset,ind.3)
## Analysis of Variance Table
##
## Model 1: d.indds ~ lag(d.indds, -1) + lag(d.indds, -2) + lag(d.indds,
## -3) + lag(z.2, -1)
## Model 2: d.indds ~ lag(d.indds, -1) + lag(d.indds, -2) + lag(d.indds,
## -3) + lag(d.pakds, -1) + lag(d.pakds, -2) + lag(d.pakds,
## -3) + lag(z.2, -1)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 34 6813459188945
## 2 31 4638544609431 3 2174914579514 4.8451 0.007038 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(pak.subset,pak.3)
## Analysis of Variance Table
##
## Model 1: d.pakds ~ lag(d.pakds, -1) + lag(d.pakds, -2) + lag(d.pakds,
## -3) + lag(z.2, -1)
## Model 2: d.pakds ~ lag(d.indds, -1) + lag(d.indds, -2) + lag(d.indds,
## -3) + lag(d.pakds, -1) + lag(d.pakds, -2) + lag(d.pakds,
## -3) + lag(z.2, -1)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 34 890666905515
## 2 31 679733139150 3 210933766366 3.2066 0.03659 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
######################################################################
#front matter
rm(list=ls())
#install.packages("fGarch")
#install.packages("tseries")
library(fGarch)
## Loading required package: fBasics
##
## Attaching package: 'fBasics'
## The following object is masked from 'package:car':
##
## densityPlot
library(tseries)
#Monte Carlo example: generate and fit an ARCH(2) model
n <- 1100
a <- c(0.1, 0.5, 0.2) # ARCH(2) coefficients
e <- rnorm(n)
x <- double(n)
x[1:2] <- rnorm(2, sd = sqrt(a[1]/(1.0-a[2]-a[3])))
for(i in 3:n) # Generate ARCH(2) process
{
x[i] <- e[i]*sqrt(a[1]+a[2]*x[i-1]^2+a[3]*x[i-2]^2)
}
x <- ts(x[101:1100])
x.arch <- garchFit(formula=~garch(2,0),
include.mean=FALSE,
data=x, trace=FALSE) # Fit ARCH(2)
### omega is the constant in "h", alpha refers to MA terms, beta refers to AR terms
### NOTE DIFFERENCES FROM "garch" IN "tseries" ###
summary(x.arch)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~garch(2, 0), data = x, include.mean = FALSE,
## trace = FALSE)
##
## Mean and Variance Equation:
## data ~ garch(2, 0)
## <environment: 0x7fb3a8097fd0>
## [data = x]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## omega alpha1 alpha2
## 0.10373 0.50385 0.21025
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## omega 0.10373 0.01003 10.337 < 2e-16 ***
## alpha1 0.50385 0.06440 7.824 0.00000000000000511 ***
## alpha2 0.21025 0.04748 4.428 0.00000950189249793 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## -713.0747 normalized: -0.7130747
##
## Description:
## Fri Jan 4 21:09:14 2019 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 1.903581 0.3860491
## Shapiro-Wilk Test R W 0.9977182 0.183673
## Ljung-Box Test R Q(10) 8.357049 0.5940044
## Ljung-Box Test R Q(15) 12.57897 0.634782
## Ljung-Box Test R Q(20) 14.31629 0.8141042
## Ljung-Box Test R^2 Q(10) 11.40563 0.3268006
## Ljung-Box Test R^2 Q(15) 16.7809 0.3321302
## Ljung-Box Test R^2 Q(20) 20.47191 0.4287787
## LM Arch Test R TR^2 10.24339 0.5946172
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## 1.432149 1.446873 1.432131 1.437745
#plot(x.arch)
#post-diagnosis: plot 11 (ACF of squared residuals) is a good choice to see if we filtered heteroscedasticity
#####Working with real data on EU stock markets#####
data(EuStockMarkets)
###Start with diagnosis###
plot(EuStockMarkets[,"DAX"], type='l')
#more variable at higher values, but first trending
plot(diff(EuStockMarkets)[,"DAX"], type='l')
#still more variable at higher values, so take the difference of the log
plot(diff(log(EuStockMarkets))[,"DAX"], type='l')
#still heteroscedastic
#sizing-up potential heteroscedasticity
squares<-diff(log(EuStockMarkets))[,"DAX"]^2
#ACF/PACF/Box-Test--They're valid for variances too!
acf(squares, 20)
pacf(squares, 20)
Box.test(squares, 20, 'Ljung-Box')
##
## Box-Ljung test
##
## data: squares
## X-squared = 137.24, df = 20, p-value < 0.00000000000000022
#Here's a model with no ARMA process, and ARCH(2)
dax.garch.1 <- garchFit(formula=~garch(2,0),
data=diff(log(EuStockMarkets))[,"DAX"],
trace=FALSE)
summary(dax.garch.1)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~garch(2, 0), data = diff(log(EuStockMarkets))[,
## "DAX"], trace = FALSE)
##
## Mean and Variance Equation:
## data ~ garch(2, 0)
## <environment: 0x7fb38e30a4e8>
## [data = diff(log(EuStockMarkets))[, "DAX"]]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu omega alpha1 alpha2
## 0.00067795 0.00008684 0.08637820 0.09013990
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu 0.000677948 0.000231896 2.924 0.003461 **
## omega 0.000086840 0.000003891 22.316 < 2e-16 ***
## alpha1 0.086378202 0.024305122 3.554 0.000380 ***
## alpha2 0.090139903 0.026104314 3.453 0.000554 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## 5900.61 normalized: 3.174077
##
## Description:
## Fri Jan 4 21:09:14 2019 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 4780.743 0
## Shapiro-Wilk Test R W 0.9533134 0
## Ljung-Box Test R Q(10) 5.018164 0.8899613
## Ljung-Box Test R Q(15) 13.69537 0.5487467
## Ljung-Box Test R Q(20) 18.04834 0.5842234
## Ljung-Box Test R^2 Q(10) 14.8518 0.137559
## Ljung-Box Test R^2 Q(15) 22.74342 0.0897415
## Ljung-Box Test R^2 Q(20) 25.3464 0.1884986
## LM Arch Test R TR^2 14.05836 0.2969968
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## -6.343851 -6.331957 -6.343861 -6.339468
#plot(dax.garch.1)
#It's a winner!
#Here's a model with no ARMA process, and GARCH(1,1)
dax.garch.2 <- garchFit(formula=~garch(1,1),
data=diff(log(EuStockMarkets))[,"DAX"],
trace=FALSE)
summary(dax.garch.2)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~garch(1, 1), data = diff(log(EuStockMarkets))[,
## "DAX"], trace = FALSE)
##
## Mean and Variance Equation:
## data ~ garch(1, 1)
## <environment: 0x7fb3a0b61420>
## [data = diff(log(EuStockMarkets))[, "DAX"]]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu omega alpha1 beta1
## 0.0006535079 0.0000047543 0.0684165305 0.8876111521
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu 0.000653508 0.000215758 3.029 0.00245 **
## omega 0.000004754 0.000001264 3.760 0.00017 ***
## alpha1 0.068416531 0.014777055 4.630 0.00000366 ***
## beta1 0.887611152 0.023558551 37.677 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## 5966.214 normalized: 3.209368
##
## Description:
## Fri Jan 4 21:09:14 2019 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 13380.69 0
## Shapiro-Wilk Test R W 0.9477474 0
## Ljung-Box Test R Q(10) 3.195816 0.9764329
## Ljung-Box Test R Q(15) 10.13427 0.8112099
## Ljung-Box Test R Q(20) 12.80196 0.8857182
## Ljung-Box Test R^2 Q(10) 0.8932651 0.9998977
## Ljung-Box Test R^2 Q(15) 1.329651 0.9999981
## Ljung-Box Test R^2 Q(20) 1.756904 1
## LM Arch Test R TR^2 1.08588 0.9999776
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## -6.414432 -6.402538 -6.414441 -6.410049
#plot(dax.garch.2)
#This one rocks our world more.
#Here's a model with an ARMA(1,1) process, and GARCH(1,1)
dax.garch.3 <- garchFit(formula=~arma(1,1)+garch(1,1),
data=diff(log(EuStockMarkets))[,"DAX"],
trace=FALSE)
## Warning in arima(.series$x, order = c(u, 0, v), include.mean =
## include.mean): possible convergence problem: optim gave code = 1
summary(dax.garch.3)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~arma(1, 1) + garch(1, 1), data = diff(log(EuStockMarkets))[,
## "DAX"], trace = FALSE)
##
## Mean and Variance Equation:
## data ~ arma(1, 1) + garch(1, 1)
## <environment: 0x7fb3a902c920>
## [data = diff(log(EuStockMarkets))[, "DAX"]]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu ar1 ma1 omega alpha1
## 0.0006113149 0.0721959825 -0.0568881669 0.0000049191 0.0705854371
## beta1
## 0.8840313184
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu 0.000611315 0.000455023 1.343 0.179
## ar1 0.072195983 0.622786398 0.116 0.908
## ma1 -0.056888167 0.633345760 -0.090 0.928
## omega 0.000004919 0.000001216 4.044 0.00005256 ***
## alpha1 0.070585437 0.014465626 4.880 0.00000106 ***
## beta1 0.884031318 0.022523290 39.250 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## 5966.945 normalized: 3.209761
##
## Description:
## Fri Jan 4 21:09:14 2019 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 13311.37 0
## Shapiro-Wilk Test R W 0.9480408 0
## Ljung-Box Test R Q(10) 2.928107 0.9830656
## Ljung-Box Test R Q(15) 9.632712 0.8421855
## Ljung-Box Test R Q(20) 12.39132 0.9019491
## Ljung-Box Test R^2 Q(10) 0.893068 0.9998978
## Ljung-Box Test R^2 Q(15) 1.332901 0.9999981
## Ljung-Box Test R^2 Q(20) 1.756903 1
## LM Arch Test R TR^2 1.089638 0.9999772
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## -6.413066 -6.395225 -6.413087 -6.406491
#plot(dax.garch.3)
#AR and MA are superfluous.
#Make a plot selection (or 0 to exit):
#1: Time Series
# 2: Conditional SD
# 3: Series with 2 Conditional SD Superimposed
# 4: ACF of Observations
# 5: ACF of Squared Observations
# 6: Cross Correlation
# 7: Residuals
# 8: Conditional SDs
# 9: Standardized Residuals
#10: ACF of Standardized Residuals
#11: ACF of Squared Standardized Residuals
#12: Cross Correlation between r^2 and r
#13: QQ-Plot of Standardized Residuals
#https://spia.uga.edu/faculty_pages/monogan/teaching/pd/
# cleanup
rm(list=ls())
library(lattice)
# load data
divorce <- read.table("Divorce.txt", sep ="\t", quote = "",header=TRUE)
#divorce<-read.table("//spia.uga.edu/faculty_pages/monogan/teaching/pd/Divorce.txt",
# sep ="\t", quote = "",header=TRUE)
# Create a new variable for year
divorce$YEAR=divorce$TIME*10+1955
#quick overview of data
head(divorce)
## DIVORCE BIRTH MARRIAGE UNEMPLOY CRIME AFDC STATE TIME STATE.Name
## 1 2.6 19.9 8.8 4.9 6.799 114 1 1 Maine
## 2 2.3 19.5 13.4 2.8 6.106 188 2 1 New Hampshire
## 3 1.5 20.5 9.0 4.2 5.793 113 3 1 Vermont
## 4 1.5 18.8 7.1 4.9 15.072 188 4 1 Massachusetts
## 5 1.3 19.4 7.1 4.9 14.180 172 5 1 Rhode Island
## 6 1.3 19.2 7.4 3.9 11.749 197 6 1 Connecticut
## Region YEAR
## 1 New England 1965
## 2 New England 1965
## 3 New England 1965
## 4 New England 1965
## 5 New England 1965
## 6 New England 1965
summary(divorce)
## DIVORCE BIRTH MARRIAGE UNEMPLOY
## Min. :0.500 Min. :11.20 Min. : 6.100 Min. : 2.300
## 1st Qu.:3.300 1st Qu.:14.30 1st Qu.: 7.975 1st Qu.: 4.700
## Median :4.250 Median :15.70 Median : 9.200 Median : 5.900
## Mean :4.361 Mean :16.38 Mean :10.175 Mean : 6.339
## 3rd Qu.:5.300 3rd Qu.:18.50 3rd Qu.:11.000 3rd Qu.: 8.000
## Max. :9.100 Max. :27.90 Max. :88.100 Max. :13.000
## NA's :12 NA's :3 NA's :4 NA's :3
## CRIME AFDC STATE TIME
## Min. : 2.458 Min. : 33.0 Min. : 1 Min. :1.00
## 1st Qu.: 6.575 1st Qu.:154.0 1st Qu.:13 1st Qu.:1.75
## Median :22.950 Median :224.0 Median :26 Median :2.50
## Mean :28.779 Mean :245.9 Mean :26 Mean :2.50
## 3rd Qu.:47.095 3rd Qu.:315.0 3rd Qu.:39 3rd Qu.:3.25
## Max. :83.421 Max. :731.0 Max. :51 Max. :4.00
## NA's :4 NA's :3
## STATE.Name Region YEAR
## Alabama : 4 South Atlantic :36 Min. :1965
## Alaska : 4 Mountain :32 1st Qu.:1972
## Arizona : 4 West North Central:28 Median :1980
## Arkansas : 4 New England :24 Mean :1980
## California: 4 East North Central:20 3rd Qu.:1988
## Colorado : 4 Pacific :20 Max. :1995
## (Other) :180 (Other) :44
# Individual-Level Time Plot Using "lattice" Graphics
#trellis.device("png",color=FALSE,file="stateDivorce.png")
xyplot(DIVORCE~YEAR, data=divorce, type='l',
groups=STATE, xlab="Year", ylab="Divorce Rate")
#dev.off()
#regional subsets of individual time plot
xyplot(DIVORCE~YEAR, data=divorce, type='l',
groups=STATE, xlab="Year", ylab="Divorce Rate",subset=Region=="New England")
xyplot(DIVORCE~YEAR, data=divorce, type='l',
groups=STATE, xlab="Year", ylab="Divorce Rate",subset=Region=="South Atlantic")
# Time Plot of Means
divorce.2 <- na.omit(divorce)
div.mean <- by(divorce.2$DIVORCE, divorce.2$YEAR, mean)
years <- c(1965,1975,1985,1995)
plot(div.mean ~ years, type='o')
# Box Plot
boxplot(DIVORCE~YEAR, data=divorce)
#####ALTERNATE CODE#####
# Individual-Level Time Plot Using "base" Graphics
plot(DIVORCE ~ YEAR, data = divorce)
for (i in divorce$STATE) {
lines(DIVORCE ~ YEAR, data = subset(divorce, STATE == i), col='gray60') }
# Individual-Level Time Plot for New England v. South
par(mfrow=c(2,1))
plot(DIVORCE ~ YEAR,
data = subset(divorce, Region=="New England"),
main="New England")
for (i in divorce$STATE) {
lines(DIVORCE ~ YEAR,
data = subset(divorce, STATE == i & Region=="New England"),
col='gray60')
}
plot(DIVORCE ~ YEAR,
data = subset(divorce, Region=="South Atlantic"),
main="South Atlantic")
for (i in divorce$STATE) {
lines(DIVORCE ~ YEAR,
data = subset(divorce, STATE == i & Region=="South Atlantic"),
col='gray60')
}
#####################################################################
# Illustration on variance components
#set time
t<-c(1:4)
#deterministic trends, notice variation between individuals
trend.1<- 1+.5*t
trend.2<- 2+.25*t
plot(y=trend.1,x=t,type='l',ylim=c(0,5))
lines(y=trend.2,x=t,lty=2)
#plus natural variation
cycle.1<-trend.1+rnorm(4,sd=.1)
cycle.2<-trend.2+rnorm(4,sd=.1)
plot(y=cycle.1,x=t,type='l',ylim=c(0,5))
lines(y=cycle.2,x=t,lty=2)
#plus measurement error
observed.1<-cycle.1+rnorm(4,sd=.1)
observed.2<-cycle.2+rnorm(4,sd=.1)
plot(y=observed.1,x=t,type='l',ylim=c(0,5))
lines(y=observed.2,x=t,lty=2)
#all at once
plot(y=trend.1,x=t,type='l',ylim=c(0,5),xlab="x",ylab="y",col='red')
lines(y=trend.2,x=t,lty=2,col='blue')
points(y=cycle.1,x=t,pch=20,col='red')
points(y=cycle.2,x=t,pch=20,col='blue')
points(y=observed.1,x=t,col='red')
points(y=observed.2,x=t,col='blue')
#####################################################################
#clean up
rm(list=ls())
#required packages
library(foreign)
##SECTION 2.5: MERGING AND RESHAPING DATA##
#load 1994 and 1995 data in CSV format
#hmnrghts.94<-read.csv("http://j.mp/PTS1994")
#hmnrghts.95<-read.csv("http://j.mp/PTS1995")
hmnrghts.94<-read.csv("pts1994.csv")
hmnrghts.95<-read.csv("pts1995.csv")
#view the top of each data set
head(hmnrghts.94)
## Country COWAlpha COW WorldBank Amnesty.1994 StateDept.1994
## 1 United States USA 2 USA 1 NA
## 2 Canada CAN 20 CAN 1 1
## 3 Bahamas BHM 31 BHS 1 2
## 4 Cuba CUB 40 CUB 3 3
## 5 Haiti HAI 41 HTI 5 4
## 6 Dominican Republic DOM 42 DOM 2 2
head(hmnrghts.95)
## Country COWAlpha COW WorldBank Amnesty.1995 StateDept.1995
## 1 United States USA 2 USA 1 NA
## 2 Canada CAN 20 CAN NA 1
## 3 Bahamas BHM 31 BHS 1 1
## 4 Cuba CUB 40 CUB 4 3
## 5 Haiti HAI 41 HTI 2 3
## 6 Dominican Republic DOM 42 DOM 2 2
#subset 1995 data to necessary variables only
hmnrghts.95<-subset(hmnrghts.95,select=c(COW,Amnesty.1995,StateDept.1995))
#merge 1994 and 1995 data
hmnrghts.wide<-merge(x=hmnrghts.94,y=hmnrghts.95,by=c("COW"))
#view merged data
head(hmnrghts.wide)
## COW Country COWAlpha WorldBank Amnesty.1994 StateDept.1994
## 1 2 United States USA USA 1 NA
## 2 20 Canada CAN CAN 1 1
## 3 31 Bahamas BHM BHS 1 2
## 4 40 Cuba CUB CUB 3 3
## 5 41 Haiti HAI HTI 5 4
## 6 42 Dominican Republic DOM DOM 2 2
## Amnesty.1995 StateDept.1995
## 1 1 NA
## 2 NA 1
## 3 1 1
## 4 4 3
## 5 2 3
## 6 2 2
#number of observations and variables of 1994, 1995, and merged data
dim(hmnrghts.94); dim(hmnrghts.95); dim(hmnrghts.wide)
## [1] 179 6
## [1] 179 3
## [1] 179 8
#reshape merged data into long format
hmnrghts.long<-reshape(hmnrghts.wide,
varying=c("Amnesty.1994","StateDept.1994","Amnesty.1995","StateDept.1995"),
timevar="year",idvar="COW",direction="long",sep=".")
#view the top of the long data, then the first few 1995 observations
head(hmnrghts.long)
## COW Country COWAlpha WorldBank year Amnesty StateDept
## 2.1994 2 United States USA USA 1994 1 NA
## 20.1994 20 Canada CAN CAN 1994 1 1
## 31.1994 31 Bahamas BHM BHS 1994 1 2
## 40.1994 40 Cuba CUB CUB 1994 3 3
## 41.1994 41 Haiti HAI HTI 1994 5 4
## 42.1994 42 Dominican Republic DOM DOM 1994 2 2
head(hmnrghts.long[hmnrghts.long$year==1995,])
## COW Country COWAlpha WorldBank year Amnesty StateDept
## 2.1995 2 United States USA USA 1995 1 NA
## 20.1995 20 Canada CAN CAN 1995 NA 1
## 31.1995 31 Bahamas BHM BHS 1995 1 1
## 40.1995 40 Cuba CUB CUB 1995 4 3
## 41.1995 41 Haiti HAI HTI 1995 2 3
## 42.1995 42 Dominican Republic DOM DOM 1995 2 2
#reshape long data into wide form
hmnrghts.wide.2<-reshape(hmnrghts.long,
v.names=c("Amnesty","StateDept"),
timevar="year",idvar="COW",direction="wide",sep=".")
#view top of new wide data
head(hmnrghts.wide.2)
## COW Country COWAlpha WorldBank Amnesty.1994
## 2.1994 2 United States USA USA 1
## 20.1994 20 Canada CAN CAN 1
## 31.1994 31 Bahamas BHM BHS 1
## 40.1994 40 Cuba CUB CUB 3
## 41.1994 41 Haiti HAI HTI 5
## 42.1994 42 Dominican Republic DOM DOM 2
## StateDept.1994 Amnesty.1995 StateDept.1995
## 2.1994 NA 1 NA
## 20.1994 1 NA 1
## 31.1994 2 1 1
## 40.1994 3 4 3
## 41.1994 4 2 3
## 42.1994 2 2 2
##MORE MERGING##
#more data
#hmnrghts.93 <- read.dta("http://j.mp/PTKstata")
hmnrghts.93 <- read.dta("hmnrghts.dta")
colnames(hmnrghts.93)<-c('country','polity.93','StateDept.1993',
'military.93','gnpcats.93','lpop.93',
'civ_war.93','int_war.93')
#new merge, and some problems
hmnrghts.wide$Country<-tolower(hmnrghts.wide$Country)
test.1<-merge(x=hmnrghts.wide,y=hmnrghts.93,by.x="Country",by.y="country",all.x=T)
test.2<-merge(x=hmnrghts.wide,y=hmnrghts.93,by.x="Country",by.y="country",all=T)
dim(hmnrghts.wide);dim(hmnrghts.93);dim(test.1);dim(test.2)
## [1] 179 8
## [1] 158 8
## [1] 179 15
## [1] 200 15
summary(test.1)
## Country COW COWAlpha WorldBank
## Length:179 Min. : 2.0 : 2 : 3
## Class :character 1st Qu.:313.0 ISR : 2 ISR* : 2
## Mode :character Median :461.0 AFG : 1 AFG : 1
## Mean :463.7 ALB : 1 AGO : 1
## 3rd Qu.:666.0 ALG : 1 ALB : 1
## Max. :990.0 ANG : 1 ARE : 1
## (Other):171 (Other):170
## Amnesty.1994 StateDept.1994 Amnesty.1995 StateDept.1995
## Min. :1.0 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.0 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.000
## Median :3.0 Median :2.000 Median :2.000 Median :2.000
## Mean :2.8 Mean :2.574 Mean :2.712 Mean :2.438
## 3rd Qu.:4.0 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.000
## Max. :5.0 Max. :5.000 Max. :5.000 Max. :5.000
## NA's :29 NA's :3 NA's :33 NA's :3
## polity.93 StateDept.1993 military.93 gnpcats.93
## Min. : 0.000 Min. :1.00 Min. :0.000 Length:179
## 1st Qu.: 0.000 1st Qu.:1.00 1st Qu.:0.000 Class :character
## Median : 6.000 Median :2.00 Median :0.000 Mode :character
## Mean : 5.239 Mean :2.46 Mean :0.146
## 3rd Qu.:10.000 3rd Qu.:3.00 3rd Qu.:0.000
## Max. :10.000 Max. :5.00 Max. :1.000
## NA's :62 NA's :42 NA's :42
## lpop.93 civ_war.93 int_war.93
## Min. :11.29 Min. :0.00000 Min. :0.0000
## 1st Qu.:14.60 1st Qu.:0.00000 1st Qu.:0.0000
## Median :15.86 Median :0.00000 Median :0.0000
## Mean :15.63 Mean :0.08029 Mean :0.0292
## 3rd Qu.:16.83 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :20.89 Max. :1.00000 Max. :1.0000
## NA's :42 NA's :42 NA's :42
#Differences? Can they be fixed?
#reshape
hmnrghts.three.long<-reshape(test.1,
varying=c("StateDept.1993","StateDept.1994","StateDept.1995"),
timevar="year",idvar="COW",direction="long",sep=".")
head(hmnrghts.three.long)
## Country COW COWAlpha WorldBank Amnesty.1994 Amnesty.1995
## 700.1993 afghanistan 700 AFG AFG 5 5
## 339.1993 albania 339 ALB ALB 3 3
## 615.1993 algeria 615 ALG DZA 5 5
## 540.1993 angola 540 ANG AGO 5 4
## 160.1993 argentina 160 ARG ARG 2 2
## 371.1993 armenia 371 ARM ARM 2 2
## polity.93 military.93 gnpcats.93 lpop.93 civ_war.93 int_war.93
## 700.1993 NA NA <NA> NA NA NA
## 339.1993 8 0 <1000 15.04 0 0
## 615.1993 0 0 1000-1999 17.12 0 0
## 540.1993 0 0 <1000 16.20 1 0
## 160.1993 8 0 >4000 17.33 0 0
## 371.1993 NA NA <NA> NA NA NA
## year StateDept
## 700.1993 1993 NA
## 339.1993 1993 2
## 615.1993 1993 5
## 540.1993 1993 5
## 160.1993 1993 2
## 371.1993 1993 NA
##QUICK DESCRIPTIVE STATS##
summary(hmnrghts.three.long)
## Country COW COWAlpha WorldBank
## Length:537 Min. : 2.0 : 6 : 9
## Class :character 1st Qu.:310.0 ISR : 6 ISR* : 6
## Mode :character Median :461.0 AFG : 3 AFG : 3
## Mean :463.7 ALB : 3 AGO : 3
## 3rd Qu.:666.0 ALG : 3 ALB : 3
## Max. :990.0 ANG : 3 ARE : 3
## (Other):513 (Other):510
## Amnesty.1994 Amnesty.1995 polity.93 military.93
## Min. :1.0 Min. :1.000 Min. : 0.000 Min. :0.000
## 1st Qu.:2.0 1st Qu.:2.000 1st Qu.: 0.000 1st Qu.:0.000
## Median :3.0 Median :2.000 Median : 6.000 Median :0.000
## Mean :2.8 Mean :2.712 Mean : 5.239 Mean :0.146
## 3rd Qu.:4.0 3rd Qu.:4.000 3rd Qu.:10.000 3rd Qu.:0.000
## Max. :5.0 Max. :5.000 Max. :10.000 Max. :1.000
## NA's :87 NA's :99 NA's :186 NA's :126
## gnpcats.93 lpop.93 civ_war.93 int_war.93
## Length:537 Min. :11.29 Min. :0.00000 Min. :0.0000
## Class :character 1st Qu.:14.60 1st Qu.:0.00000 1st Qu.:0.0000
## Mode :character Median :15.86 Median :0.00000 Median :0.0000
## Mean :15.63 Mean :0.08029 Mean :0.0292
## 3rd Qu.:16.83 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :20.89 Max. :1.00000 Max. :1.0000
## NA's :126 NA's :126 NA's :126
## year StateDept
## Min. :1993 Min. :1.000
## 1st Qu.:1993 1st Qu.:1.000
## Median :1994 Median :2.000
## Mean :1994 Mean :2.493
## 3rd Qu.:1995 3rd Qu.:3.000
## Max. :1995 Max. :5.000
## NA's :48
##BOXPLOTS##
par(mfrow=c(1,2))
boxplot(StateDept~year,data=hmnrghts.three.long,
subset=polity.93>=6,ylim=c(1,5),main="More Democratic")
boxplot(StateDept~year,data=hmnrghts.three.long,
subset=polity.93<6,ylim=c(1,5),main="Less Democratic")
#simple pooled model
hmnrghts.three.long$time<-hmnrghts.three.long$year-1993
hmnrghts.three.long$dummy<-as.numeric(hmnrghts.three.long$polity.93>=6)
pooled.mod<-lm(StateDept~time*dummy,data=hmnrghts.three.long)
summary(pooled.mod)
##
## Call:
## lm(formula = StateDept ~ time * dummy, data = hmnrghts.three.long)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2288 -1.0409 -0.0488 0.8889 2.9671
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.22876 0.14418 22.394 < 2e-16 ***
## time -0.05882 0.11168 -0.527 0.599
## dummy -1.19585 0.19207 -6.226 0.00000000139 ***
## time:dummy 0.06677 0.14894 0.448 0.654
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.128 on 345 degrees of freedom
## (188 observations deleted due to missingness)
## Multiple R-squared: 0.2003, Adjusted R-squared: 0.1933
## F-statistic: 28.8 on 3 and 345 DF, p-value: < 0.00000000000000022
#####################################################################
#Load libraries
rm(list=ls())
library(nlme)
library(car)
###DATA MANAGEMENT###
#load data
#tlc<-read.table(file.choose(), header=TRUE, sep="")
tlc<-read.table("tlc.txt", header=TRUE, sep="")
#reshape data
m.tlc<-reshape(tlc, varying=c("w0","w1","w4","w6"), idvar="id", timevar="week",direction="long",sep="")
#relevel treatment so that Placebo is the reference
m.tlc$treat<-relevel(m.tlc$treat,"P")
#rename our dependent variable of blood lead level from "w" to "value"
colnames(m.tlc)[4]<-"value"
###RESPONSE PROFILES###
#Just for the heck of it: OLS
ols.profiles<-lm(value~as.factor(treat)+as.factor(week)+
as.factor(treat)*as.factor(week), data=m.tlc)
summary(ols.profiles)
##
## Call:
## lm(formula = value ~ as.factor(treat) + as.factor(week) + as.factor(treat) *
## as.factor(week), data = m.tlc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.662 -4.620 -0.993 3.672 43.138
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 26.272 0.937 28.038
## as.factor(treat)A 0.268 1.325 0.202
## as.factor(week)1 -1.612 1.325 -1.216
## as.factor(week)4 -2.202 1.325 -1.662
## as.factor(week)6 -2.626 1.325 -1.982
## as.factor(treat)A:as.factor(week)1 -11.406 1.874 -6.086
## as.factor(treat)A:as.factor(week)4 -8.824 1.874 -4.709
## as.factor(treat)A:as.factor(week)6 -3.152 1.874 -1.682
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## as.factor(treat)A 0.8398
## as.factor(week)1 0.2245
## as.factor(week)4 0.0974 .
## as.factor(week)6 0.0482 *
## as.factor(treat)A:as.factor(week)1 0.00000000275 ***
## as.factor(treat)A:as.factor(week)4 0.00000346729 ***
## as.factor(treat)A:as.factor(week)6 0.0934 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.626 on 392 degrees of freedom
## Multiple R-squared: 0.3284, Adjusted R-squared: 0.3164
## F-statistic: 27.38 on 7 and 392 DF, p-value: < 0.00000000000000022
#Now the real model: GLS with REML (note: you need 'nlme' here)
# This function fits a linear model using generalized least squares. The errors are allowed to be correlated and/or have unequal variances.
gls.profiles<-nlme::gls(value~as.factor(treat)+as.factor(week)+
as.factor(treat)*as.factor(week),
data=m.tlc, method="REML",
correlation= corSymm(form=~1|id),
na.action=na.omit)
#What do you make of the na.action?
#next week: more on corClasses
summary(gls.profiles)
## Generalized least squares fit by REML
## Model: value ~ as.factor(treat) + as.factor(week) + as.factor(treat) * as.factor(week)
## Data: m.tlc
## AIC BIC logLik
## 2471.632 2531.201 -1220.816
##
## Correlation Structure: General
## Formula: ~1 | id
## Parameter estimate(s):
## Correlation:
## 1 2 3
## 2 0.596
## 3 0.582 0.769
## 4 0.536 0.552 0.551
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 26.272 0.9374730 28.024273 0.0000
## as.factor(treat)A 0.268 1.3257871 0.202144 0.8399
## as.factor(week)1 -1.612 0.8425878 -1.913154 0.0565
## as.factor(week)4 -2.202 0.8576242 -2.567558 0.0106
## as.factor(week)6 -2.626 0.9034129 -2.906755 0.0039
## as.factor(treat)A:as.factor(week)1 -11.406 1.1915990 -9.572012 0.0000
## as.factor(treat)A:as.factor(week)4 -8.824 1.2128637 -7.275343 0.0000
## as.factor(treat)A:as.factor(week)6 -3.152 1.2776188 -2.467090 0.0140
##
## Correlation:
## (Intr) as.()A as.()1 as.()4 as.()6
## as.factor(treat)A -0.707
## as.factor(week)1 -0.449 0.318
## as.factor(week)4 -0.457 0.323 0.719
## as.factor(week)6 -0.482 0.341 0.485 0.492
## as.factor(treat)A:as.factor(week)1 0.318 -0.449 -0.707 -0.508 -0.343
## as.factor(treat)A:as.factor(week)4 0.323 -0.457 -0.508 -0.707 -0.348
## as.factor(treat)A:as.factor(week)6 0.341 -0.482 -0.343 -0.348 -0.707
## a.()A:.()1 a.()A:.()4
## as.factor(treat)A
## as.factor(week)1
## as.factor(week)4
## as.factor(week)6
## as.factor(treat)A:as.factor(week)1
## as.factor(treat)A:as.factor(week)4 0.719
## as.factor(treat)A:as.factor(week)6 0.485 0.492
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.5135258 -0.6970199 -0.1497978 0.5540105 6.5075307
##
## Residual standard error: 6.628935
## Degrees of freedom: 400 total; 392 residual
AIC(gls.profiles)
## [1] 2471.632
#Means of placebo and treated
placebo.mean <- by(m.tlc$value[m.tlc$treat=="P"],
m.tlc$week[m.tlc$treat=="P"],
mean, na.rm=T)
agent.mean <- by(m.tlc$value[m.tlc$treat=="A"],
m.tlc$week[m.tlc$treat=="A"],
mean)
#Plot Expectations
a<-gls.profiles$coefficients
time<-c(0,1,4,6)
placebo<-c(a[1],a[1]+a[3],a[1]+a[4],a[1]+a[5])
agent<-c(a[1]+a[2],a[1]+a[2]+a[3]+a[6],
a[1]+a[2]+a[4]+a[7],a[1]+a[2]+a[5]+a[8])
plot(y=placebo,x=time,type='l',ylim=c(10,30))
lines(y=agent,x=time,lty=2)
points(y=placebo.mean,x=time)
points(y=agent.mean,x=time,pch=20)
#Do over-time response profiles differ by group?
#Wald test
rhs<-c(0,0,0)
hm<-rbind(
c(0,0,0,0,0,1,0,0),
c(0,0,0,0,0,0,1,0),
c(0,0,0,0,0,0,0,1)
)
linearHypothesis(gls.profiles,hm,rhs)
## Linear hypothesis test
##
## Hypothesis:
## as.factor(treat)A:as.factor(week)1 = 0
## as.factor(treat)A:as.factor(week)4 = 0
## as.factor(treat)A:as.factor(week)6 = 0
##
## Model 1: restricted model
## Model 2: value ~ as.factor(treat) + as.factor(week) + as.factor(treat) *
## as.factor(week)
##
## Df Chisq Pr(>Chisq)
## 1
## 2 3 99.249 < 0.00000000000000022 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#LR test (notice re-estimation with ML)
gls.ml<-gls(value~as.factor(treat)+as.factor(week)+
as.factor(treat)*as.factor(week),
data=m.tlc, method="ML",
correlation= corSymm(form=~1|id))
no.inter<-gls(value~as.factor(treat)+as.factor(week),
data=m.tlc, method="ML",
correlation= corSymm(form=~1|id))
anova(gls.ml,no.inter)
## Model df AIC BIC logLik Test L.Ratio p-value
## gls.ml 1 15 2481.445 2541.317 -1225.722
## no.inter 2 12 2541.949 2589.847 -1258.975 1 vs 2 66.50439 <.0001
###PARAMETRIC CURVES###
#Linear Time Trend
linear<-gls(value~as.factor(treat)+week+
as.factor(treat)*week, data=m.tlc,
method="REML", correlation= corSymm(form=~1|id))
summary(linear)
## Generalized least squares fit by REML
## Model: value ~ as.factor(treat) + week + as.factor(treat) * week
## Data: m.tlc
## AIC BIC logLik
## 2595.58 2639.375 -1286.79
##
## Correlation Structure: General
## Formula: ~1 | id
## Parameter estimate(s):
## Correlation:
## 1 2 3
## 2 -0.021
## 3 0.194 0.779
## 4 0.570 0.336 0.460
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 25.632659 0.7438572 34.45911 0.0000
## as.factor(treat)A -5.431447 1.0519730 -5.16310 0.0000
## week -0.344673 0.1220539 -2.82394 0.0050
## as.factor(treat)A:week 0.091068 0.1726103 0.52760 0.5981
##
## Correlation:
## (Intr) as.()A week
## as.factor(treat)A -0.707
## week -0.140 0.099
## as.factor(treat)A:week 0.099 -0.140 -0.707
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.29776505 -0.65413448 -0.06926895 0.57725972 6.05949763
##
## Residual standard error: 7.462733
## Degrees of freedom: 400 total; 396 residual
AIC(linear)
## [1] 2595.58
#Quadratic Time Trend
quadratic<-gls(value~as.factor(treat)+week+I(week^2)+
as.factor(treat)*week+as.factor(treat)*I(week^2),
data=m.tlc, method="REML", correlation= corSymm(form=~1|id))
summary(quadratic)
## Generalized least squares fit by REML
## Model: value ~ as.factor(treat) + week + I(week^2) + as.factor(treat) * week + as.factor(treat) * I(week^2)
## Data: m.tlc
## AIC BIC logLik
## 2562.444 2614.136 -1268.222
##
## Correlation Structure: General
## Formula: ~1 | id
## Parameter estimate(s):
## Correlation:
## 1 2 3
## 2 0.236
## 3 0.592 0.615
## 4 0.427 0.529 0.526
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 25.790988 0.7831345 32.93302 0.0000
## as.factor(treat)A -3.332650 1.1075195 -3.00911 0.0028
## week -0.758501 0.5610096 -1.35203 0.1771
## I(week^2) 0.070594 0.0965306 0.73131 0.4650
## as.factor(treat)A:week -5.435095 0.7933873 -6.85049 0.0000
## as.factor(treat)A:I(week^2) 0.946460 0.1365149 6.93302 0.0000
##
## Correlation:
## (Intr) as.()A week I(w^2) as.()A:
## as.factor(treat)A -0.707
## week -0.220 0.156
## I(week^2) 0.171 -0.121 -0.978
## as.factor(treat)A:week 0.156 -0.220 -0.707 0.691
## as.factor(treat)A:I(week^2) -0.121 0.171 0.691 -0.707 -0.978
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.59012968 -0.65648466 -0.09227529 0.59984631 6.10631369
##
## Residual standard error: 6.876374
## Degrees of freedom: 400 total; 394 residual
AIC(quadratic)
## [1] 2562.444
#Spline with One Knot
m.tlc$w1<-pmax(m.tlc$week-1,0) #note: parallel maxima
spline<-gls(value~as.factor(treat)+week+w1+as.factor(treat)*week+
as.factor(treat)*w1, data=m.tlc, method="REML",
correlation= corSymm(form=~1|id))
summary(spline) #we included the treatment main effect
## Generalized least squares fit by REML
## Model: value ~ as.factor(treat) + week + w1 + as.factor(treat) * week + as.factor(treat) * w1
## Data: m.tlc
## AIC BIC logLik
## 2488.123 2539.816 -1231.062
##
## Correlation Structure: General
## Formula: ~1 | id
## Parameter estimate(s):
## Correlation:
## 1 2 3
## 2 0.600
## 3 0.577 0.762
## 4 0.532 0.549 0.528
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 26.271418 0.9440823 27.827467 0.0000
## as.factor(treat)A 0.342922 1.3351340 0.256844 0.7974
## week -1.612527 0.8442870 -1.909927 0.0569
## w1 1.411934 0.9335284 1.512470 0.1312
## as.factor(treat)A:week -11.338158 1.1940021 -9.495928 0.0000
## as.factor(treat)A:w1 12.704573 1.3202085 9.623156 0.0000
##
## Correlation:
## (Intr) as.()A week w1 as.()A:
## as.factor(treat)A -0.707
## week -0.448 0.317
## w1 0.393 -0.278 -0.989
## as.factor(treat)A:week 0.317 -0.448 -0.707 0.699
## as.factor(treat)A:w1 -0.278 0.393 0.699 -0.707 -0.989
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.3051791 -0.6828107 -0.1297304 0.5559840 6.6503051
##
## Residual standard error: 6.677473
## Degrees of freedom: 400 total; 394 residual
AIC(spline)
## [1] 2488.123
#draw pictures and compare to repsonse profiles
time.ruler<-seq(0,6,by=.01)
spline.ruler<-pmax(time.ruler-1,0)
newdata.P<-m.tlc[rep(1,601),]
newdata.P[,2]<-as.factor("P")
newdata.P[,3]<-time.ruler
newdata.P[,5]<-spline.ruler
newdata.A<-m.tlc[rep(1,601),]
newdata.A[,2]<-as.factor("A")
newdata.A[,3]<-time.ruler
newdata.A[,5]<-spline.ruler
placebo.linear<-predict(linear,newdata=newdata.P)
agent.linear<-predict(linear,newdata=newdata.A)
placebo.quadratic<-predict(quadratic,newdata=newdata.P)
agent.quadratic<-predict(quadratic,newdata=newdata.A)
placebo.spline<-predict(spline,newdata=newdata.P)
agent.spline<-predict(spline,newdata=newdata.A)
#plot all forms of our model in the same space (calls on objects created in response profile code)
#also try adding one model's lines at a time
plot(y=placebo.mean,x=time,type='p',ylim=c(10,30))
points(y=agent.mean,x=time,pch=20)
lines(y=placebo,x=time)
lines(y=agent,x=time,lty=2)
lines(y=placebo.linear,x=time.ruler,col='blue')
lines(y=agent.linear,x=time.ruler,col='blue',lty=2)
lines(y=placebo.quadratic,x=time.ruler,col='red')
lines(y=agent.quadratic,x=time.ruler,col='red',lty=2)
lines(y=placebo.spline,x=time.ruler,col='forestgreen')
lines(y=agent.spline,x=time.ruler,col='forestgreen',lty=2)
#clean up
rm(list=ls())
#Load libraries
library(nlme)
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:Matrix':
##
## expand
library(car)
###DATA MANAGEMENT###
#load data (missing is listed as ".")
#exercise<-read.table(file.choose(), header=TRUE, sep="")
exercise.0<-read.table("exercise.txt", header=TRUE, sep="", na.strings=".")
#listwise deletion (Gasp!)
exercise<-na.omit(exercise.0)
#reshape data
m.exercise<-melt.data.frame(data=exercise,
measure.vars=c("d0","d2","d4","d6",
"d8","d10","d12"),
id=c("id","prog"))
m.exercise$value<-as.numeric(m.exercise$value)
#create variable for day
m.exercise$day<-as.numeric(substr(m.exercise$variable,2,3))
#Subset to make waves uneven for the purpose of illustration
data<-subset(m.exercise, day==0 | day==4 | day==6 | day==8 | day==12)
#relevel treatment so that Placebo is the reference
data$treat<-data$prog-1
###CHOOSE A COVARIANCE STRUCTURE###
##Unstructured##
unstructured<-gls(value~treat+as.factor(day)+treat*as.factor(day),
data=data, method="REML", na.action=na.omit,
correlation= corSymm(form=~1|id))
summary(unstructured)
## Generalized least squares fit by REML
## Model: value ~ treat + as.factor(day) + treat * as.factor(day)
## Data: data
## AIC BIC logLik
## 407.6288 463.362 -182.8144
##
## Correlation Structure: General
## Formula: ~1 | id
## Parameter estimate(s):
## Correlation:
## 1 2 3 4
## 2 0.940
## 3 0.917 0.958
## 4 0.906 0.961 0.960
## 5 0.872 0.914 0.933 0.959
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 79.20000 1.0668462 74.23751 0.0000
## treat 1.56923 1.4190381 1.10584 0.2713
## as.factor(day)4 1.40000 0.3704020 3.77968 0.0003
## as.factor(day)6 1.50000 0.4347482 3.45027 0.0008
## as.factor(day)8 2.30000 0.4628240 4.96949 0.0000
## as.factor(day)12 2.10000 0.5403588 3.88631 0.0002
## treat:as.factor(day)4 -0.47692 0.4926808 -0.96802 0.3353
## treat:as.factor(day)6 0.11538 0.5782692 0.19953 0.8422
## treat:as.factor(day)8 -1.06923 0.6156135 -1.73685 0.0853
## treat:as.factor(day)12 -0.17692 0.7187444 -0.24616 0.8060
##
## Correlation:
## (Intr) treat as.()4 as.()6 as.()8 a.()12 t:.()4
## treat -0.752
## as.factor(day)4 -0.174 0.131
## as.factor(day)6 -0.204 0.153 0.716
## as.factor(day)8 -0.217 0.163 0.763 0.774
## as.factor(day)12 -0.253 0.190 0.584 0.701 0.824
## treat:as.factor(day)4 0.131 -0.174 -0.752 -0.538 -0.574 -0.439
## treat:as.factor(day)6 0.153 -0.204 -0.538 -0.752 -0.582 -0.527 0.716
## treat:as.factor(day)8 0.163 -0.217 -0.574 -0.582 -0.752 -0.619 0.763
## treat:as.factor(day)12 0.190 -0.253 -0.439 -0.527 -0.619 -0.752 0.584
## t:.()6 t:.()8
## treat
## as.factor(day)4
## as.factor(day)6
## as.factor(day)8
## as.factor(day)12
## treat:as.factor(day)4
## treat:as.factor(day)6
## treat:as.factor(day)8 0.774
## treat:as.factor(day)12 0.701 0.824
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.07489555 -0.69657208 0.06840315 0.68289145 1.68955780
##
## Residual standard error: 3.373664
## Degrees of freedom: 115 total; 105 residual
AIC(unstructured)
## [1] 407.6288
##First-Order Autoregressive##
ar.1<-gls(value~treat+as.factor(day)+treat*as.factor(day),
data=data, method="REML", na.action=na.omit,
correlation= corAR1(form=~1|id))
summary(ar.1)
## Generalized least squares fit by REML
## Model: value ~ treat + as.factor(day) + treat * as.factor(day)
## Data: data
## AIC BIC logLik
## 399.9096 431.7571 -187.9548
##
## Correlation Structure: AR(1)
## Formula: ~1 | id
## Parameter estimate(s):
## Phi
## 0.9546676
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 79.20000 1.0760813 73.60039 0.0000
## treat 1.56923 1.4313219 1.09635 0.2754
## as.factor(day)4 1.40000 0.3240145 4.32079 0.0000
## as.factor(day)6 1.50000 0.4530028 3.31124 0.0013
## as.factor(day)8 2.30000 0.5485383 4.19296 0.0001
## as.factor(day)12 2.10000 0.6262902 3.35308 0.0011
## treat:as.factor(day)4 -0.47692 0.4309796 -1.10660 0.2710
## treat:as.factor(day)6 0.11538 0.6025501 0.19149 0.8485
## treat:as.factor(day)8 -1.06923 0.7296241 -1.46545 0.1458
## treat:as.factor(day)12 -0.17692 0.8330438 -0.21238 0.8322
##
## Correlation:
## (Intr) treat as.()4 as.()6 as.()8 a.()12 t:.()4
## treat -0.752
## as.factor(day)4 -0.151 0.113
## as.factor(day)6 -0.210 0.158 0.699
## as.factor(day)8 -0.255 0.192 0.565 0.807
## as.factor(day)12 -0.291 0.219 0.484 0.691 0.856
## treat:as.factor(day)4 0.113 -0.151 -0.752 -0.526 -0.424 -0.364
## treat:as.factor(day)6 0.158 -0.210 -0.526 -0.752 -0.607 -0.520 0.699
## treat:as.factor(day)8 0.192 -0.255 -0.424 -0.607 -0.752 -0.644 0.565
## treat:as.factor(day)12 0.219 -0.291 -0.364 -0.520 -0.644 -0.752 0.484
## t:.()6 t:.()8
## treat
## as.factor(day)4
## as.factor(day)6
## as.factor(day)8
## as.factor(day)12
## treat:as.factor(day)4
## treat:as.factor(day)6
## treat:as.factor(day)8 0.807
## treat:as.factor(day)12 0.691 0.856
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.0570885 -0.6905940 0.0678161 0.6770308 1.6750578
##
## Residual standard error: 3.402868
## Degrees of freedom: 115 total; 105 residual
AIC(ar.1)
## [1] 399.9096
#What does the AR(1) look like?
p.1<-0.9546676
#remember: 0, 4, 6, 8, 12
row1 <-c(1,p.1,p.1^2,p.1^3,p.1^4)
row2 <-c(p.1,1,p.1,p.1^2,p.1^3)
row3 <-c(p.1^2,p.1,1,p.1,p.1^2)
row4 <-c(p.1^3,p.1^2,p.1,1,p.1)
row5 <-c(p.1^4,p.1^3,p.1^2,p.1,1)
ar1.cor<-rbind(row1,row2,row3,row4,row5); ar1.cor
## [,1] [,2] [,3] [,4] [,5]
## row1 1.0000000 0.9546676 0.9113902 0.8700747 0.8306321
## row2 0.9546676 1.0000000 0.9546676 0.9113902 0.8700747
## row3 0.9113902 0.9546676 1.0000000 0.9546676 0.9113902
## row4 0.8700747 0.9113902 0.9546676 1.0000000 0.9546676
## row5 0.8306321 0.8700747 0.9113902 0.9546676 1.0000000
#Exponential##
exp.mod<-gls(value~treat+as.factor(day)+treat*as.factor(day),
data=data, method="REML", na.action=na.omit,
correlation= corCAR1(form=~day|id))
summary(exp.mod)
## Generalized least squares fit by REML
## Model: value ~ treat + as.factor(day) + treat * as.factor(day)
## Data: data
## AIC BIC logLik
## 401.0567 432.9042 -188.5283
##
## Correlation Structure: Continuous AR(1)
## Formula: ~day | id
## Parameter estimate(s):
## Phi
## 0.9835166
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 79.20000 1.0777090 73.48923 0.0000
## treat 1.56923 1.4334870 1.09469 0.2762
## as.factor(day)4 1.40000 0.3865395 3.62188 0.0005
## as.factor(day)6 1.50000 0.4695484 3.19456 0.0018
## as.factor(day)8 2.30000 0.5377873 4.27678 0.0000
## as.factor(day)12 2.10000 0.6480935 3.24027 0.0016
## treat:as.factor(day)4 -0.47692 0.5141456 -0.92760 0.3557
## treat:as.factor(day)6 0.11538 0.6245577 0.18475 0.8538
## treat:as.factor(day)8 -1.06923 0.7153239 -1.49475 0.1380
## treat:as.factor(day)12 -0.17692 0.8620450 -0.20524 0.8378
##
## Correlation:
## (Intr) treat as.()4 as.()6 as.()8 a.()12 t:.()4
## treat -0.752
## as.factor(day)4 -0.179 0.135
## as.factor(day)6 -0.218 0.164 0.810
## as.factor(day)8 -0.250 0.188 0.696 0.859
## as.factor(day)12 -0.301 0.226 0.559 0.690 0.803
## treat:as.factor(day)4 0.135 -0.179 -0.752 -0.609 -0.523 -0.420
## treat:as.factor(day)6 0.164 -0.218 -0.609 -0.752 -0.646 -0.519 0.810
## treat:as.factor(day)8 0.188 -0.250 -0.523 -0.646 -0.752 -0.604 0.696
## treat:as.factor(day)12 0.226 -0.301 -0.420 -0.519 -0.604 -0.752 0.559
## t:.()6 t:.()8
## treat
## as.factor(day)4
## as.factor(day)6
## as.factor(day)8
## as.factor(day)12
## treat:as.factor(day)4
## treat:as.factor(day)6
## treat:as.factor(day)8 0.859
## treat:as.factor(day)12 0.690 0.803
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.05398155 -0.68955095 0.06771368 0.67600821 1.67252783
##
## Residual standard error: 3.408015
## Degrees of freedom: 115 total; 105 residual
AIC(exp.mod)
## [1] 401.0567
#What does the exponential look like?
p<-0.9835166
#remember: 0, 4, 6, 8, 12
r1 <-c(1,p^4,p^6,p^8,p^12)
r2 <-c(p^4,1,p^2,p^4,p^8)
r3 <-c(p^6,p^2,1,p^2,p^6)
r4 <-c(p^8,p^4,p^2,1,p^4)
r5 <-c(p^12,p^8,p^6,p^4,1)
exp.cor<-rbind(r1,r2,r3,r4,r5); exp.cor
## [,1] [,2] [,3] [,4] [,5]
## r1 1.0000000 0.9356788 0.9050867 0.8754948 0.8191819
## r2 0.9356788 1.0000000 0.9673049 0.9356788 0.8754948
## r3 0.9050867 0.9673049 1.0000000 0.9673049 0.9050867
## r4 0.8754948 0.9356788 0.9673049 1.0000000 0.9356788
## r5 0.8191819 0.8754948 0.9050867 0.9356788 1.0000000
#Toeplitz (back door)
ar.4<-gls(value~treat+as.factor(day)+treat*as.factor(day),
data=data, method="REML", na.action=na.omit,
correlation= corARMA(p=4, form=~1|id))
summary(ar.4)
## Generalized least squares fit by REML
## Model: value ~ treat + as.factor(day) + treat * as.factor(day)
## Data: data
## AIC BIC logLik
## 399.9912 439.8006 -184.9956
##
## Correlation Structure: ARMA(4,0)
## Formula: ~1 | id
## Parameter estimate(s):
## Phi1 Phi2 Phi3 Phi4
## 0.70405177 0.35048067 -0.12343874 0.03547153
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 79.20000 1.0819234 73.20297 0.0000
## treat 1.56923 1.4390926 1.09043 0.2780
## as.factor(day)4 1.40000 0.3242031 4.31828 0.0000
## as.factor(day)6 1.50000 0.3800323 3.94703 0.0001
## as.factor(day)8 2.30000 0.4695994 4.89779 0.0000
## as.factor(day)12 2.10000 0.5207479 4.03266 0.0001
## treat:as.factor(day)4 -0.47692 0.4312304 -1.10596 0.2713
## treat:as.factor(day)6 0.11538 0.5054902 0.22826 0.8199
## treat:as.factor(day)8 -1.06923 0.6246255 -1.71179 0.0899
## treat:as.factor(day)12 -0.17692 0.6926594 -0.25543 0.7989
##
## Correlation:
## (Intr) treat as.()4 as.()6 as.()8 a.()12 t:.()4
## treat -0.752
## as.factor(day)4 -0.150 0.113
## as.factor(day)6 -0.176 0.132 0.586
## as.factor(day)8 -0.217 0.163 0.595 0.728
## as.factor(day)12 -0.241 0.181 0.461 0.685 0.790
## treat:as.factor(day)4 0.113 -0.150 -0.752 -0.441 -0.447 -0.347
## treat:as.factor(day)6 0.132 -0.176 -0.441 -0.752 -0.547 -0.515 0.586
## treat:as.factor(day)8 0.163 -0.217 -0.447 -0.547 -0.752 -0.594 0.595
## treat:as.factor(day)12 0.181 -0.241 -0.347 -0.515 -0.594 -0.752 0.461
## t:.()6 t:.()8
## treat
## as.factor(day)4
## as.factor(day)6
## as.factor(day)8
## as.factor(day)12
## treat:as.factor(day)4
## treat:as.factor(day)6
## treat:as.factor(day)8 0.728
## treat:as.factor(day)12 0.685 0.790
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.04598073 -0.68686496 0.06744991 0.67337498 1.66601288
##
## Residual standard error: 3.421342
## Degrees of freedom: 115 total; 105 residual
AIC(ar.4)
## [1] 399.9912
#What does it look like?
t.0<-1
t.1<-0.70405177
t.2<-0.70405177^2 + 0.35048067
t.3<- 0.70405177^3 + 0.35048067^2 -0.12343874
t.4<- 0.70405177^4 + 0.35048067^3 +(-0.12343874)^2 + 0.03547153
r1 <-c(t.0,t.1,t.2,t.3,t.4)
r2 <-c(t.1,t.0,t.1,t.2,t.3)
r3 <-c(t.2,t.1,t.0,t.1,t.2)
r4 <-c(t.3,t.2,t.1,t.0,t.1)
r5 <-c(t.4,t.3,t.2,t.1,t.0)
toep.cor<-rbind(r1,r2,r3,r4,r5); toep.cor
## [,1] [,2] [,3] [,4] [,5]
## r1 1.0000000 0.7040518 0.8461696 0.3483886 0.3394680
## r2 0.7040518 1.0000000 0.7040518 0.8461696 0.3483886
## r3 0.8461696 0.7040518 1.0000000 0.7040518 0.8461696
## r4 0.3483886 0.8461696 0.7040518 1.0000000 0.7040518
## r5 0.3394680 0.3483886 0.8461696 0.7040518 1.0000000
###RESIDUAL ANALYSIS###
#create transformed residuals
data$resid<-ar.1$resid
data$yhat<-fitted(ar.1)
alt.Sigma<-(unstructured$sigma^2)*as.matrix(unstructured$modelStruct$corStruct)$'2' #strategy for unstructured
Sigma<-(ar.1$sigma^2)*ar1.cor #strategy for first-order autoregressive
tL<-chol(alt.Sigma)
L<-t(tL)
L%*%tL
## [,1] [,2] [,3] [,4] [,5]
## [1,] 11.381607 10.69562 10.43658 10.31058 9.921669
## [2,] 10.695619 11.38161 10.90379 10.93331 10.405237
## [3,] 10.436577 10.90379 11.38161 10.92224 10.623756
## [4,] 10.310577 10.93331 10.92224 11.38161 10.910219
## [5,] 9.921669 10.40524 10.62376 10.91022 11.381607
inv.lower<-solve(L)
for(i in data$id){
data$std[data$id==i]<-inv.lower%*%as.matrix(data$resid[data$id==i])
data$fit.std[data$id==i]<-inv.lower%*%as.matrix(data$yhat[data$id==i])
}
#plot transformed residuals against transformed fitted values
plot(y=data$std,x=data$fit.std)
plot(y=data$std,x=data$day)
#plot untransformed residuals against fitted values
plot(y=data$resid,x=data$yhat)
plot(y=data$resid,x=data$day)
#semivariogram
#plot(Variogram(ar.1, form=~day|id,resType="normalized"))
#contrast:
#plot(Variogram(ar.1, form=~day|id))
#Mahalanobis distance
data$d<-rep(NA,nrow(data))
data$p<-rep(NA,nrow(data))
for(i in data$id){
r<-as.vector(data$std[data$id==i])
data$d[data$id==i]<-t(r)%*%r
data$p[data$id==i]<-1-pchisq(data$d[i],df=length(r))
}
table(data$id[data$p<.05])
##
## 12 35
## 5 5
length(table(data$id[data$p<.05]))/23
## [1] 0.08695652
#clean up
rm(list=ls())
#packages
library(faraway)
##
## Attaching package: 'faraway'
## The following object is masked from 'package:lattice':
##
## melanoma
## The following objects are masked from 'package:car':
##
## logit, vif
library(lme4)
##
## Attaching package: 'lme4'
## The following object is masked from 'package:nlme':
##
## lmList
library(nlme)
library(lattice)
#load data
data(psid)
#View the data
xyplot(log(income+100)~year|sex, psid, type='l', groups=person)
boxplot(log(income+100)~year, psid)
#Create a variable: "centered year"
psid$cyear <- psid$year-78
#Run a mixed-effects model with random intercept only
#Syntax for "lme4" library
income.mod.int <- lmer(log(income)~cyear*sex+age+educ+(1|person), data=psid)
summary(income.mod.int)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(income) ~ cyear * sex + age + educ + (1 | person)
## Data: psid
##
## REML criterion at convergence: 3964.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -9.5182 -0.2257 0.1282 0.4563 2.5309
##
## Random effects:
## Groups Name Variance Std.Dev.
## person (Intercept) 0.2893 0.5379
## Residual 0.5530 0.7436
## Number of obs: 1661, groups: person, 85
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.753055 0.558674 12.088
## cyear 0.080155 0.004464 17.955
## sexM 1.126062 0.123062 9.150
## age 0.008724 0.013895 0.628
## educ 0.106692 0.022073 4.834
## cyear:sexM -0.018597 0.005949 -3.126
##
## Correlation of Fixed Effects:
## (Intr) cyear sexM age educ
## cyear -0.012
## sexM -0.104 0.039
## age -0.874 0.005 -0.025
## educ -0.598 -0.001 0.010 0.167
## cyear:sexM 0.029 -0.750 -0.027 -0.017 -0.018
#Syntax for "lme" library
income.mod.int.2 <- lme(log(income)~cyear*sex+age+educ+cyear,
random=~1|person, data=psid)
summary(income.mod.int.2)
## Linear mixed-effects model fit by REML
## Data: psid
## AIC BIC logLik
## 3980.726 4024.019 -1982.363
##
## Random effects:
## Formula: ~1 | person
## (Intercept) Residual
## StdDev: 0.5378884 0.7436354
##
## Fixed effects: log(income) ~ cyear * sex + age + educ + cyear
## Value Std.Error DF t-value p-value
## (Intercept) 6.753055 0.5586742 1574 12.087644 0.0000
## cyear 0.080155 0.0044641 1574 17.955301 0.0000
## sexM 1.126062 0.1230617 81 9.150389 0.0000
## age 0.008724 0.0138951 81 0.627846 0.5319
## educ 0.106692 0.0220734 81 4.833502 0.0000
## cyear:sexM -0.018597 0.0059491 1574 -3.126082 0.0018
## Correlation:
## (Intr) cyear sexM age educ
## cyear -0.012
## sexM -0.104 0.039
## age -0.874 0.005 -0.025
## educ -0.598 -0.001 0.010 0.167
## cyear:sexM 0.029 -0.750 -0.027 -0.017 -0.018
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -9.5182099 -0.2257278 0.1281839 0.4562720 2.5308800
##
## Number of Observations: 1661
## Number of Groups: 85
#Try a fixed effects model
income.mod.fe <- lm(log(income)~cyear*sex+age+educ+cyear+as.factor(person),
data=psid)
summary(income.mod.fe)
##
## Call:
## lm(formula = log(income) ~ cyear * sex + age + educ + cyear +
## as.factor(person), data = psid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9895 -0.1673 0.0816 0.3359 2.0169
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.145410 12.920967 0.785 0.43246
## cyear 0.079958 0.004483 17.835 < 2e-16 ***
## sexM 1.145185 1.660946 0.689 0.49062
## age -0.107610 0.409948 -0.262 0.79297
## educ 0.115639 0.164272 0.704 0.48157
## as.factor(person)2 -0.179434 0.326953 -0.549 0.58322
## as.factor(person)3 0.507549 3.304448 0.154 0.87795
## as.factor(person)4 0.123132 0.594901 0.207 0.83605
## as.factor(person)5 -1.221613 0.384218 -3.179 0.00150 **
## as.factor(person)6 0.682249 1.334056 0.511 0.60914
## as.factor(person)7 -0.133064 0.602578 -0.221 0.82526
## as.factor(person)8 -0.281718 2.964826 -0.095 0.92431
## as.factor(person)9 -0.354659 0.761349 -0.466 0.64140
## as.factor(person)10 -0.367622 1.978114 -0.186 0.85259
## as.factor(person)11 1.110915 3.203989 0.347 0.72884
## as.factor(person)12 0.477826 2.060144 0.232 0.81662
## as.factor(person)13 1.187416 3.371969 0.352 0.72478
## as.factor(person)14 -0.023231 1.098891 -0.021 0.98314
## as.factor(person)15 0.219048 1.417092 0.155 0.87718
## as.factor(person)16 0.493176 2.716483 0.182 0.85596
## as.factor(person)17 1.146691 4.122914 0.278 0.78095
## as.factor(person)18 0.542061 4.193835 0.129 0.89717
## as.factor(person)19 0.719695 1.334056 0.539 0.58963
## as.factor(person)20 1.498543 2.963639 0.506 0.61318
## as.factor(person)21 1.104357 4.266322 0.259 0.79578
## as.factor(person)22 -0.702777 0.776235 -0.905 0.36541
## as.factor(person)23 0.106271 0.453017 0.235 0.81456
## as.factor(person)24 -0.571459 1.357893 -0.421 0.67393
## as.factor(person)25 -0.289934 1.820636 -0.159 0.87349
## as.factor(person)26 0.477732 2.558154 0.187 0.85188
## as.factor(person)27 0.600111 3.375136 0.178 0.85890
## as.factor(person)28 0.579976 2.896383 0.200 0.84132
## as.factor(person)29 0.147055 2.389453 0.062 0.95093
## as.factor(person)30 -1.213550 1.980442 -0.613 0.54012
## as.factor(person)31 -1.063418 3.373897 -0.315 0.75266
## as.factor(person)32 0.080251 0.557841 0.144 0.88563
## as.factor(person)33 1.033991 4.691802 0.220 0.82560
## as.factor(person)34 -1.048944 0.219302 -4.783 0.00000189 ***
## as.factor(person)35 0.800893 1.739416 0.460 0.64527
## as.factor(person)36 -0.233985 0.989035 -0.237 0.81301
## as.factor(person)37 -0.340023 0.551710 -0.616 0.53778
## as.factor(person)38 0.664940 1.901703 0.350 0.72664
## as.factor(person)39 -0.493901 1.759288 -0.281 0.77895
## as.factor(person)40 0.483949 2.389453 0.203 0.83952
## as.factor(person)41 0.756165 1.990453 0.380 0.70407
## as.factor(person)42 -0.350022 0.487242 -0.718 0.47263
## as.factor(person)43 1.478191 4.601803 0.321 0.74809
## as.factor(person)44 0.696375 1.744784 0.399 0.68986
## as.factor(person)45 -1.401859 0.761350 -1.841 0.06577 .
## as.factor(person)46 1.556486 5.011757 0.311 0.75617
## as.factor(person)47 0.627801 3.611950 0.174 0.86204
## as.factor(person)48 -0.224764 0.613448 -0.366 0.71412
## as.factor(person)49 2.398449 2.963331 0.809 0.41842
## as.factor(person)50 -0.675815 0.234218 -2.885 0.00396 **
## as.factor(person)51 -0.654761 3.783755 -0.173 0.86264
## as.factor(person)52 0.798271 1.824574 0.438 0.66180
## as.factor(person)53 0.254131 2.146520 0.118 0.90577
## as.factor(person)54 -0.095449 4.924972 -0.019 0.98454
## as.factor(person)55 0.576077 0.347161 1.659 0.09724 .
## as.factor(person)56 0.283898 0.787021 0.361 0.71835
## as.factor(person)57 -0.716431 1.641590 -0.436 0.66259
## as.factor(person)58 -0.146621 0.861726 -0.170 0.86492
## as.factor(person)59 1.187977 1.739788 0.683 0.49482
## as.factor(person)60 0.095620 1.334056 0.072 0.94287
## as.factor(person)61 -0.483417 1.571585 -0.308 0.75843
## as.factor(person)62 -0.427260 2.388930 -0.179 0.85808
## as.factor(person)63 2.108865 3.713308 0.568 0.57017
## as.factor(person)64 -0.307102 0.938117 -0.327 0.74344
## as.factor(person)65 -0.026107 1.335588 -0.020 0.98441
## as.factor(person)66 0.298867 2.806711 0.106 0.91521
## as.factor(person)67 -0.392233 2.869202 -0.137 0.89128
## as.factor(person)68 1.446472 0.700572 2.065 0.03911 *
## as.factor(person)69 -0.734596 0.272163 -2.699 0.00703 **
## as.factor(person)70 0.223412 0.264186 0.846 0.39787
## as.factor(person)71 0.177656 0.334595 0.531 0.59552
## as.factor(person)72 0.132741 0.678126 0.196 0.84483
## as.factor(person)73 0.359924 0.994736 0.362 0.71753
## as.factor(person)74 0.327971 0.788095 0.416 0.67735
## as.factor(person)75 0.022977 0.865110 0.027 0.97881
## as.factor(person)76 1.110517 2.716483 0.409 0.68274
## as.factor(person)77 1.766549 3.896585 0.453 0.65035
## as.factor(person)78 0.402196 0.758975 0.530 0.59624
## as.factor(person)79 0.018154 3.783334 0.005 0.99617
## as.factor(person)80 -1.165715 0.336220 -3.467 0.00054 ***
## as.factor(person)81 1.111121 2.965305 0.375 0.70793
## as.factor(person)82 0.664343 5.245187 0.127 0.89923
## as.factor(person)83 NA NA NA NA
## as.factor(person)84 NA NA NA NA
## as.factor(person)85 NA NA NA NA
## cyear:sexM -0.018738 0.005972 -3.138 0.00173 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7437 on 1574 degrees of freedom
## Multiple R-squared: 0.6323, Adjusted R-squared: 0.6122
## F-statistic: 31.47 on 86 and 1574 DF, p-value: < 0.00000000000000022
#Run a mixed-effects model with random intercept and random slope of year
#Syntax for "lme4" library
income.mod <- lmer(log(income)~cyear*sex+age+educ+(cyear|person), data=psid)
summary(income.mod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(income) ~ cyear * sex + age + educ + (cyear | person)
## Data: psid
##
## REML criterion at convergence: 3819.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -10.2310 -0.2134 0.0795 0.4147 2.8254
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## person (Intercept) 0.2817 0.53071
## cyear 0.0024 0.04899 0.19
## Residual 0.4673 0.68357
## Number of obs: 1661, groups: person, 85
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.67420 0.54332 12.284
## cyear 0.08531 0.00900 9.480
## sexM 1.15031 0.12129 9.484
## age 0.01093 0.01352 0.808
## educ 0.10421 0.02144 4.861
## cyear:sexM -0.02631 0.01224 -2.150
##
## Correlation of Fixed Effects:
## (Intr) cyear sexM age educ
## cyear 0.020
## sexM -0.104 -0.098
## age -0.874 0.002 -0.026
## educ -0.597 0.000 0.008 0.167
## cyear:sexM -0.003 -0.735 0.156 -0.010 -0.011
#Syntax for "lme" library
income.mod.2 <- lme(log(income)~cyear*sex+age+educ+cyear,
random=~cyear|person, data=psid)
summary(income.mod.2)
## Linear mixed-effects model fit by REML
## Data: psid
## AIC BIC logLik
## 3839.776 3893.892 -1909.888
##
## Random effects:
## Formula: ~cyear | person
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 0.53071321 (Intr)
## cyear 0.04898952 0.187
## Residual 0.68357323
##
## Fixed effects: log(income) ~ cyear * sex + age + educ + cyear
## Value Std.Error DF t-value p-value
## (Intercept) 6.674204 0.5433252 1574 12.283995 0.0000
## cyear 0.085312 0.0089996 1574 9.479521 0.0000
## sexM 1.150313 0.1212925 81 9.483790 0.0000
## age 0.010932 0.0135238 81 0.808342 0.4213
## educ 0.104210 0.0214366 81 4.861287 0.0000
## cyear:sexM -0.026307 0.0122378 1574 -2.149607 0.0317
## Correlation:
## (Intr) cyear sexM age educ
## cyear 0.020
## sexM -0.104 -0.098
## age -0.874 0.002 -0.026
## educ -0.597 0.000 0.008 0.167
## cyear:sexM -0.003 -0.735 0.156 -0.010 -0.011
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -10.23102885 -0.21344108 0.07945029 0.41471605 2.82543559
##
## Number of Observations: 1661
## Number of Groups: 85
###Predicting Random Effects###
#initialize variables
rand.eff<-matrix(NA,nrow=length(table(psid$person)),ncol=2)
#Create Covariance Matrix of Random Effects
a<-c(0.2816564, 0.53071*0.04899*0.187)
b<-c(0.53071*0.04899*0.187, 0.0024)
G<-rbind(a,b)
sigma.2<-0.4672724
#Add residuals to data set
psid$resid<-residuals(income.mod)
#Predict Random Effects with BLUP
for(i in psid$person){
z<-cbind(1, psid$cyear[psid$person==i])
Sigma<-z%*%G%*%t(z)+diag(x=sigma.2, nrow=nrow(z))
rand.eff[i,]<-G%*%t(z)%*%solve(Sigma)%*%as.matrix(psid$resid[psid$person==i])
}
head(rand.eff)
## [,1] [,2]
## [1,] 0.008126650 0.005794171
## [2,] -0.004270293 0.003262456
## [3,] 0.006876111 -0.007389998
## [4,] 0.010418173 -0.001665729
## [5,] -0.022921505 -0.017969627
## [6,] 0.008693688 0.003539400
#create individual predicted coefficient vectors
ind.coefs<-matrix(income.mod@beta,
nrow=length(table(psid$person)),
ncol=6,byrow=T)+cbind(rand.eff,0,0,0,0)
head(ind.coefs)
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 6.682331 0.09110630 1.150313 0.01093182 0.1042097 -0.02630651
## [2,] 6.669934 0.08857458 1.150313 0.01093182 0.1042097 -0.02630651
## [3,] 6.681081 0.07792213 1.150313 0.01093182 0.1042097 -0.02630651
## [4,] 6.684623 0.08364640 1.150313 0.01093182 0.1042097 -0.02630651
## [5,] 6.651283 0.06734250 1.150313 0.01093182 0.1042097 -0.02630651
## [6,] 6.682898 0.08885153 1.150313 0.01093182 0.1042097 -0.02630651
#predictions accounting for random effects, individual 1
psid$const<-1
psid.no.1<-psid[psid$person==1,c("const","cyear","sex","age","educ")]
psid.no.1$sex<-1
psid.no.1$inter<-psid.no.1$sex*psid.no.1$cyear
predictions.1<-as.matrix(psid.no.1)%*%ind.coefs[1,]
#graph in two forms
plot(y=predictions.1,x=psid.no.1$cyear+1978,
ylab="Predicted Logged Income",
xlab="Year",main="Respondent 1",type='l')
plot(y=exp(predictions.1),x=psid.no.1$cyear+1978,
ylab="Predicted Income",
xlab="Year",main="Respondent 1",type='l')
###Residual Analyses###
#initialize variables
psid$std<-rep(NA,nrow(psid))
psid$d<-rep(NA,nrow(psid))
psid$p<-rep(NA,nrow(psid))
#Create Standardized Residuals
for(i in psid$person){
z<-cbind(1, psid$cyear[psid$person==i])
Sigma<-z%*%G%*%t(z)+diag(x=sigma.2, nrow=nrow(z))
tL<-chol(Sigma)
inv.lower<-solve(t(tL))
psid$std[psid$person==i]<-inv.lower%*%as.matrix(psid$resid[psid$person==i])
}
#Mahalanobis Distance
for(i in psid$person){
r<-as.vector(psid$std[psid$person==i])
psid$d[psid$person==i]<-t(r)%*%r
psid$p[psid$person==i]<-1-pchisq(psid$d[i],df=length(r))
}
table(psid$person[psid$p<.05])
##
## 62 64 66 67 69 70 71 73 74 82 83 84 85
## 15 14 16 12 14 17 12 16 13 23 23 23 22
#Figures
#Histogram & Density Plot of Regular Residuals
hist(psid$resid)
densityplot(psid$resid)
#Histogram & Density Plot of Transformed Residuals
hist(psid$std)
densityplot(psid$std)
#Transformed Residuals Against Fitted Values
mod.fit<-fitted(income.mod)
plot(y=psid$std, x=mod.fit)
#Transformed Residuals Against Time
plot(y=psid$std, x=psid$cyear)
#Semi-Variogram (crafted for "nlme" library)
plot(Variogram(income.mod.2, form=~cyear|person), ylim=c(0,1.2))
##############################################################
#data
data(psid)
#Create a variable: "centered year"
psid$cyear <- psid$year-78
#Run a mixed-effects model with random intercept and random slope of year
#Syntax for "lme4" library
income.mod <- lmer(log(income)~cyear*sex+age+educ+(cyear|person), data=psid)
summary(income.mod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(income) ~ cyear * sex + age + educ + (cyear | person)
## Data: psid
##
## REML criterion at convergence: 3819.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -10.2310 -0.2134 0.0795 0.4147 2.8254
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## person (Intercept) 0.2817 0.53071
## cyear 0.0024 0.04899 0.19
## Residual 0.4673 0.68357
## Number of obs: 1661, groups: person, 85
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.67420 0.54332 12.284
## cyear 0.08531 0.00900 9.480
## sexM 1.15031 0.12129 9.484
## age 0.01093 0.01352 0.808
## educ 0.10421 0.02144 4.861
## cyear:sexM -0.02631 0.01224 -2.150
##
## Correlation of Fixed Effects:
## (Intr) cyear sexM age educ
## cyear 0.020
## sexM -0.104 -0.098
## age -0.874 0.002 -0.026
## educ -0.597 0.000 0.008 0.167
## cyear:sexM -0.003 -0.735 0.156 -0.010 -0.011
#Example of how to create a variable: year observed
for(i in psid$person){
psid$year.2[psid$person==i]<-(psid$year[psid$person==i][1])
}
#Create a more interesting variable: age over time
for(i in psid$person){
psid$age.2[psid$person==i]<-c(1:length(psid$person[psid$person==i]))-1
}
#Age all jumbled together
psid$age.3<-psid$age+psid$age.2
#Decompose the cross-sectional and longitudinal effects of age
income.mod.1 <- lmer(log(income)~age+age.2+educ+(1|person), data=psid)
summary(income.mod.1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(income) ~ age + age.2 + educ + (1 | person)
## Data: psid
##
## REML criterion at convergence: 4002.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -9.3637 -0.2030 0.1262 0.4591 2.5084
##
## Random effects:
## Groups Name Variance Std.Dev.
## person (Intercept) 0.5452 0.7384
## Residual 0.5523 0.7432
## Number of obs: 1661, groups: person, 85
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.684690 0.745378 8.968
## age 0.011047 0.018652 0.592
## age.2 0.073405 0.003081 23.826
## educ 0.103059 0.029608 3.481
##
## Correlation of Fixed Effects:
## (Intr) age age.2
## age -0.881
## age.2 -0.024 -0.008
## educ -0.599 0.166 -0.017
#Jumble everything together
income.mod.2 <- lmer(log(income)~age.3+educ+(1|person), data=psid)
summary(income.mod.2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(income) ~ age.3 + educ + (1 | person)
## Data: psid
##
## REML criterion at convergence: 4006.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -9.3332 -0.2057 0.1272 0.4548 2.4957
##
## Random effects:
## Groups Name Variance Std.Dev.
## person (Intercept) 0.6115 0.7820
## Residual 0.5524 0.7432
## Number of obs: 1661, groups: person, 85
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.557267 0.393758 11.574
## age.3 0.071814 0.003041 23.614
## educ 0.119273 0.030830 3.869
##
## Correlation of Fixed Effects:
## (Intr) age.3
## age.3 -0.330
## educ -0.921 0.011
#We're probably picking-up the effect of year here.
##############################################################
library(plm)
## Loading required package: Formula
##
## Attaching package: 'plm'
## The following object is masked from 'package:timeSeries':
##
## lag
library(lmtest)
library(pcse)
##
## Attaching package: 'pcse'
## The following object is masked from 'package:sandwich':
##
## vcovPC
library(nlme)
library(foreign)
#load data # Simulated Panel of 50 States Over 10 Years
#state <-read.dta("http://spia.uga.edu/faculty_pages/monogan/teaching/pd/STATE2.DTA",
# convert.factors=FALSE)
state <-read.dta("STATE2.DTA",convert.factors=FALSE)
### UNIT EFFECTS ###
#Is the mean of y the same across units?
anova.mod<-aov(y~state,data=state)
summary(anova.mod)
## Df Sum Sq Mean Sq F value Pr(>F)
## state 1 210 210 0.065 0.799
## Residuals 498 1619256 3252
#With an OLS model, is the mean of the residuals the same across units?
main.model <- lm(y~x, data=state); summary(main.model)
##
## Call:
## lm(formula = y ~ x, data = state)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.99 -11.93 -0.25 12.94 24.73
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.76900 0.63107 80.45 <2e-16 ***
## x 1.96500 0.02249 87.38 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.11 on 498 degrees of freedom
## Multiple R-squared: 0.9388, Adjusted R-squared: 0.9386
## F-statistic: 7635 on 1 and 498 DF, p-value: < 0.00000000000000022
anova.resids <- aov(main.model$residuals~state$state)
summary(anova.resids)
## Df Sum Sq Mean Sq F value Pr(>F)
## state$state 1 3 2.75 0.014 0.907
## Residuals 498 99161 199.12
#fixed effects model
fe.mod <- plm(y~x, data=state,index=c("state","time"),model="within")
summary(fe.mod)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = y ~ x, data = state, model = "within", index = c("state",
## "time"))
##
## Balanced Panel: n = 50, T = 10, N = 500
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -31.5999 -10.4457 -0.8056 10.7443 31.9038
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## x 1.969086 0.023423 84.066 < 0.00000000000000022 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1478100
## Residual Sum of Squares: 88300
## R-Squared: 0.94026
## Adj. R-Squared: 0.93361
## F-statistic: 7067.11 on 1 and 449 DF, p-value: < 0.000000000000000222
#random effects model
re.mod <- plm(y~x, data=state,index=c("state","time"),model="random")
summary(re.mod)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = y ~ x, data = state, model = "random", index = c("state",
## "time"))
##
## Balanced Panel: n = 50, T = 10, N = 500
##
## Effects:
## var std.dev share
## idiosyncratic 196.660 14.024 0.986
## individual 2.827 1.681 0.014
## theta: 0.06495
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -27.26881 -11.79069 -0.37252 12.90005 24.77429
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 50.76900 0.67025 75.747 < 0.00000000000000022 ***
## x 1.96547 0.02246 87.510 < 0.00000000000000022 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1601700
## Residual Sum of Squares: 97799
## R-Squared: 0.93894
## Adj. R-Squared: 0.93882
## F-statistic: 7657.97 on 1 and 498 DF, p-value: < 0.000000000000000222
#Is random effects a sufficient specification?
phtest(fe.mod, re.mod)
##
## Hausman Test
##
## data: y ~ x
## chisq = 0.29526, df = 1, p-value = 0.5869
## alternative hypothesis: one model is inconsistent
### SERIAL CORRELATION AND HETEROSCEDASTICITY ###
#Are residuals autocorrelated?
pbgtest(fe.mod)
##
## Breusch-Godfrey/Wooldridge test for serial correlation in panel
## models
##
## data: y ~ x
## chisq = 60.81, df = 10, p-value = 0.000000002546
## alternative hypothesis: serial correlation in idiosyncratic errors
pbgtest(re.mod)
##
## Breusch-Godfrey/Wooldridge test for serial correlation in panel
## models
##
## data: y ~ x
## chisq = 5.7469, df = 10, p-value = 0.8361
## alternative hypothesis: serial correlation in idiosyncratic errors
#Is there heteroscedasticity across units?
by(data=fe.mod$residuals, INDICES=state$state,FUN=sd)
## state$state: 1
## [1] 11.81807
## --------------------------------------------------------
## state$state: 2
## [1] 13.89172
## --------------------------------------------------------
## state$state: 3
## [1] 15.4049
## --------------------------------------------------------
## state$state: 4
## [1] 12.51429
## --------------------------------------------------------
## state$state: 5
## [1] 17.57407
## --------------------------------------------------------
## state$state: 6
## [1] 11.97787
## --------------------------------------------------------
## state$state: 7
## [1] 16.49319
## --------------------------------------------------------
## state$state: 8
## [1] 10.86398
## --------------------------------------------------------
## state$state: 9
## [1] 14.66044
## --------------------------------------------------------
## state$state: 10
## [1] 15.02289
## --------------------------------------------------------
## state$state: 11
## [1] 15.12577
## --------------------------------------------------------
## state$state: 12
## [1] 16.64692
## --------------------------------------------------------
## state$state: 13
## [1] 16.59279
## --------------------------------------------------------
## state$state: 14
## [1] 12.14405
## --------------------------------------------------------
## state$state: 15
## [1] 14.34336
## --------------------------------------------------------
## state$state: 16
## [1] 16.13807
## --------------------------------------------------------
## state$state: 17
## [1] 10.88982
## --------------------------------------------------------
## state$state: 18
## [1] 16.20893
## --------------------------------------------------------
## state$state: 19
## [1] 11.50404
## --------------------------------------------------------
## state$state: 20
## [1] 16.14991
## --------------------------------------------------------
## state$state: 21
## [1] 11.24411
## --------------------------------------------------------
## state$state: 22
## [1] 14.53759
## --------------------------------------------------------
## state$state: 23
## [1] 16.11378
## --------------------------------------------------------
## state$state: 24
## [1] 14.48975
## --------------------------------------------------------
## state$state: 25
## [1] 11.10953
## --------------------------------------------------------
## state$state: 26
## [1] 12.7745
## --------------------------------------------------------
## state$state: 27
## [1] 14.9713
## --------------------------------------------------------
## state$state: 28
## [1] 16.78286
## --------------------------------------------------------
## state$state: 29
## [1] 10.24506
## --------------------------------------------------------
## state$state: 30
## [1] 10.06918
## --------------------------------------------------------
## state$state: 31
## [1] 13.33506
## --------------------------------------------------------
## state$state: 32
## [1] 14.03133
## --------------------------------------------------------
## state$state: 33
## [1] 12.48712
## --------------------------------------------------------
## state$state: 34
## [1] 13.17779
## --------------------------------------------------------
## state$state: 35
## [1] 9.262702
## --------------------------------------------------------
## state$state: 36
## [1] 19.28064
## --------------------------------------------------------
## state$state: 37
## [1] 13.92287
## --------------------------------------------------------
## state$state: 38
## [1] 11.73758
## --------------------------------------------------------
## state$state: 39
## [1] 16.87471
## --------------------------------------------------------
## state$state: 40
## [1] 10.13145
## --------------------------------------------------------
## state$state: 41
## [1] 15.64463
## --------------------------------------------------------
## state$state: 42
## [1] 9.173528
## --------------------------------------------------------
## state$state: 43
## [1] 14.43694
## --------------------------------------------------------
## state$state: 44
## [1] 13.77322
## --------------------------------------------------------
## state$state: 45
## [1] 12.89994
## --------------------------------------------------------
## state$state: 46
## [1] 14.90767
## --------------------------------------------------------
## state$state: 47
## [1] 13.03579
## --------------------------------------------------------
## state$state: 48
## [1] 16.4042
## --------------------------------------------------------
## state$state: 49
## [1] 14.40868
## --------------------------------------------------------
## state$state: 50
## [1] 13.5256
by(data=re.mod$residuals, INDICES=state$state,FUN=sd)
## state$state: 1
## [1] 11.82263
## --------------------------------------------------------
## state$state: 2
## [1] 13.88126
## --------------------------------------------------------
## state$state: 3
## [1] 15.35098
## --------------------------------------------------------
## state$state: 4
## [1] 12.5062
## --------------------------------------------------------
## state$state: 5
## [1] 17.6232
## --------------------------------------------------------
## state$state: 6
## [1] 11.9775
## --------------------------------------------------------
## state$state: 7
## [1] 16.53566
## --------------------------------------------------------
## state$state: 8
## [1] 10.83712
## --------------------------------------------------------
## state$state: 9
## [1] 14.71178
## --------------------------------------------------------
## state$state: 10
## [1] 15.01937
## --------------------------------------------------------
## state$state: 11
## [1] 15.13247
## --------------------------------------------------------
## state$state: 12
## [1] 16.67284
## --------------------------------------------------------
## state$state: 13
## [1] 16.59559
## --------------------------------------------------------
## state$state: 14
## [1] 12.19081
## --------------------------------------------------------
## state$state: 15
## [1] 14.30411
## --------------------------------------------------------
## state$state: 16
## [1] 16.15757
## --------------------------------------------------------
## state$state: 17
## [1] 10.91339
## --------------------------------------------------------
## state$state: 18
## [1] 16.2043
## --------------------------------------------------------
## state$state: 19
## [1] 11.5274
## --------------------------------------------------------
## state$state: 20
## [1] 16.13781
## --------------------------------------------------------
## state$state: 21
## [1] 11.23782
## --------------------------------------------------------
## state$state: 22
## [1] 14.48624
## --------------------------------------------------------
## state$state: 23
## [1] 16.15662
## --------------------------------------------------------
## state$state: 24
## [1] 14.43354
## --------------------------------------------------------
## state$state: 25
## [1] 11.08485
## --------------------------------------------------------
## state$state: 26
## [1] 12.75815
## --------------------------------------------------------
## state$state: 27
## [1] 14.95834
## --------------------------------------------------------
## state$state: 28
## [1] 16.82505
## --------------------------------------------------------
## state$state: 29
## [1] 10.21616
## --------------------------------------------------------
## state$state: 30
## [1] 10.04843
## --------------------------------------------------------
## state$state: 31
## [1] 13.31257
## --------------------------------------------------------
## state$state: 32
## [1] 14.01391
## --------------------------------------------------------
## state$state: 33
## [1] 12.52335
## --------------------------------------------------------
## state$state: 34
## [1] 13.16966
## --------------------------------------------------------
## state$state: 35
## [1] 9.204659
## --------------------------------------------------------
## state$state: 36
## [1] 19.26064
## --------------------------------------------------------
## state$state: 37
## [1] 13.88357
## --------------------------------------------------------
## state$state: 38
## [1] 11.78806
## --------------------------------------------------------
## state$state: 39
## [1] 16.83903
## --------------------------------------------------------
## state$state: 40
## [1] 10.15442
## --------------------------------------------------------
## state$state: 41
## [1] 15.64971
## --------------------------------------------------------
## state$state: 42
## [1] 9.129661
## --------------------------------------------------------
## state$state: 43
## [1] 14.4415
## --------------------------------------------------------
## state$state: 44
## [1] 13.75224
## --------------------------------------------------------
## state$state: 45
## [1] 12.93517
## --------------------------------------------------------
## state$state: 46
## [1] 14.88587
## --------------------------------------------------------
## state$state: 47
## [1] 13.05417
## --------------------------------------------------------
## state$state: 48
## [1] 16.42224
## --------------------------------------------------------
## state$state: 49
## [1] 14.43068
## --------------------------------------------------------
## state$state: 50
## [1] 13.56733
#Running a random effects model that specifies first-order serial correlation
mod.lme <- lme(y ~ x, data = state, random = ~1 | state,
correlation = corAR1(0, form = ~time | state))
summary(mod.lme)
## Linear mixed-effects model fit by REML
## Data: state
## AIC BIC logLik
## 4078.056 4099.109 -2034.028
##
## Random effects:
## Formula: ~1 | state
## (Intercept) Residual
## StdDev: 1.930501 13.97998
##
## Correlation Structure: AR(1)
## Formula: ~time | state
## Parameter estimate(s):
## Phi
## -0.03048009
## Fixed effects: y ~ x
## Value Std.Error DF t-value p-value
## (Intercept) 50.77004 0.6666945 449 76.15188 0
## x 1.96514 0.0224076 449 87.69965 0
## Correlation:
## (Intr)
## x 0
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.98437665 -0.82812706 -0.04295284 0.92107232 1.78051783
##
## Number of Observations: 500
## Number of Groups: 50
#Running a fixed effects model that accounts for first-order serial correlation
fe.gls.model <- gls(y~x+as.factor(state),
correlation=corAR1(0, form=~time|state),
data=state)
summary(fe.gls.model)
## Generalized least squares fit by REML
## Model: y ~ x + as.factor(state)
## Data: state
## AIC BIC logLik
## 3879.1 4096.772 -1886.55
##
## Correlation Structure: AR(1)
## Formula: ~time | state
## Parameter estimate(s):
## Phi
## -0.03326118
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 38.51014 4.303892 8.94775 0.0000
## x 1.96878 0.023354 84.30015 0.0000
## as.factor(state)2 15.21545 6.087978 2.49926 0.0128
## as.factor(state)3 6.10203 6.082887 1.00315 0.3163
## as.factor(state)4 12.39929 6.068969 2.04306 0.0416
## as.factor(state)5 9.12632 6.082922 1.50032 0.1342
## as.factor(state)6 12.93973 6.071881 2.13109 0.0336
## as.factor(state)7 16.78769 6.097093 2.75339 0.0061
## as.factor(state)8 18.67235 6.110262 3.05590 0.0024
## as.factor(state)9 8.04805 6.067793 1.32636 0.1854
## as.factor(state)10 11.31959 6.069784 1.86491 0.0628
## as.factor(state)11 14.30927 6.082237 2.35263 0.0191
## as.factor(state)12 15.77289 6.072210 2.59755 0.0097
## as.factor(state)13 9.53445 6.077853 1.56872 0.1174
## as.factor(state)14 10.23571 6.067146 1.68707 0.0923
## as.factor(state)15 20.36892 6.075082 3.35286 0.0009
## as.factor(state)16 15.80246 6.077278 2.60025 0.0096
## as.factor(state)17 18.18053 6.087073 2.98674 0.0030
## as.factor(state)18 12.11560 6.095337 1.98768 0.0475
## as.factor(state)19 14.28269 6.085891 2.34685 0.0194
## as.factor(state)20 11.02179 6.066479 1.81684 0.0699
## as.factor(state)21 13.82819 6.066234 2.27953 0.0231
## as.factor(state)22 15.94378 6.067758 2.62762 0.0089
## as.factor(state)23 8.41616 6.099328 1.37985 0.1683
## as.factor(state)24 5.99225 6.123529 0.97856 0.3283
## as.factor(state)25 12.83281 6.086141 2.10853 0.0355
## as.factor(state)26 14.58066 6.082256 2.39725 0.0169
## as.factor(state)27 7.82404 6.066380 1.28974 0.1978
## as.factor(state)28 17.02825 6.070942 2.80488 0.0053
## as.factor(state)29 11.16618 6.076695 1.83754 0.0668
## as.factor(state)30 14.09157 6.067493 2.32247 0.0207
## as.factor(state)31 17.44543 6.066947 2.87549 0.0042
## as.factor(state)32 15.10688 6.075537 2.48651 0.0133
## as.factor(state)33 10.82480 6.094083 1.77628 0.0764
## as.factor(state)34 5.66005 6.074270 0.93181 0.3519
## as.factor(state)35 16.98846 6.073798 2.79701 0.0054
## as.factor(state)36 11.23724 6.082011 1.84762 0.0653
## as.factor(state)37 10.71316 6.102103 1.75565 0.0798
## as.factor(state)38 6.39052 6.070020 1.05280 0.2930
## as.factor(state)39 13.34745 6.069346 2.19916 0.0284
## as.factor(state)40 21.82623 6.081142 3.58917 0.0004
## as.factor(state)41 19.56149 6.069372 3.22298 0.0014
## as.factor(state)42 15.34824 6.078588 2.52497 0.0119
## as.factor(state)43 4.43297 6.070927 0.73020 0.4657
## as.factor(state)44 8.72527 6.081059 1.43483 0.1520
## as.factor(state)45 3.83766 6.075919 0.63162 0.5280
## as.factor(state)46 6.90064 6.068853 1.13706 0.2561
## as.factor(state)47 11.07919 6.105142 1.81473 0.0702
## as.factor(state)48 15.43003 6.082025 2.53699 0.0115
## as.factor(state)49 7.11239 6.086309 1.16859 0.2432
## as.factor(state)50 17.09450 6.072454 2.81509 0.0051
##
## Correlation:
## (Intr) x as.()2 as.()3 as.()4 as.()5 as.()6 as.()7
## x -0.082
## as.factor(state)2 -0.709 0.085
## as.factor(state)3 -0.709 0.074 0.503
## as.factor(state)4 -0.707 0.030 0.501 0.501
## as.factor(state)5 -0.709 0.074 0.503 0.503 0.501
## as.factor(state)6 -0.708 0.043 0.501 0.501 0.501 0.501
## as.factor(state)7 -0.709 0.101 0.504 0.504 0.500 0.504 0.501
## as.factor(state)8 -0.709 0.120 0.505 0.504 0.500 0.504 0.501 0.506
## as.factor(state)9 -0.706 0.023 0.500 0.500 0.500 0.500 0.500 0.500
## as.factor(state)10 -0.707 0.034 0.501 0.501 0.501 0.501 0.501 0.501
## as.factor(state)11 -0.709 0.073 0.503 0.503 0.501 0.503 0.501 0.503
## as.factor(state)12 -0.708 0.045 0.501 0.501 0.501 0.501 0.501 0.501
## as.factor(state)13 -0.708 0.062 0.502 0.502 0.501 0.502 0.501 0.503
## as.factor(state)14 -0.706 0.018 0.500 0.500 0.500 0.500 0.500 0.499
## as.factor(state)15 -0.708 0.054 0.502 0.502 0.501 0.502 0.501 0.502
## as.factor(state)16 -0.708 0.060 0.502 0.502 0.501 0.502 0.501 0.503
## as.factor(state)17 -0.709 0.083 0.503 0.503 0.501 0.503 0.501 0.504
## as.factor(state)18 -0.709 0.098 0.504 0.503 0.500 0.503 0.501 0.505
## as.factor(state)19 -0.709 0.080 0.503 0.503 0.501 0.503 0.501 0.504
## as.factor(state)20 -0.706 0.010 0.499 0.499 0.500 0.499 0.500 0.498
## as.factor(state)21 -0.704 -0.004 0.498 0.498 0.500 0.498 0.499 0.497
## as.factor(state)22 -0.706 0.023 0.500 0.500 0.500 0.500 0.500 0.500
## as.factor(state)23 -0.709 0.104 0.504 0.504 0.500 0.504 0.501 0.505
## as.factor(state)24 -0.709 0.137 0.505 0.504 0.499 0.504 0.501 0.507
## as.factor(state)25 -0.709 0.081 0.503 0.503 0.501 0.503 0.501 0.504
## as.factor(state)26 -0.709 0.073 0.503 0.503 0.501 0.503 0.501 0.503
## as.factor(state)27 -0.705 0.008 0.499 0.499 0.500 0.499 0.500 0.498
## as.factor(state)28 -0.707 0.040 0.501 0.501 0.501 0.501 0.501 0.501
## as.factor(state)29 -0.708 0.059 0.502 0.502 0.501 0.502 0.501 0.503
## as.factor(state)30 -0.706 0.021 0.500 0.500 0.500 0.500 0.500 0.499
## as.factor(state)31 -0.706 0.016 0.499 0.500 0.500 0.500 0.500 0.499
## as.factor(state)32 -0.708 0.055 0.502 0.502 0.501 0.502 0.501 0.502
## as.factor(state)33 -0.709 0.096 0.504 0.503 0.500 0.503 0.501 0.505
## as.factor(state)34 -0.708 0.052 0.502 0.502 0.501 0.502 0.501 0.502
## as.factor(state)35 -0.708 0.050 0.502 0.502 0.501 0.502 0.501 0.502
## as.factor(state)36 -0.709 0.072 0.503 0.503 0.501 0.503 0.501 0.503
## as.factor(state)37 -0.709 0.108 0.504 0.504 0.500 0.504 0.501 0.505
## as.factor(state)38 -0.707 0.036 0.501 0.501 0.501 0.501 0.501 0.501
## as.factor(state)39 -0.707 0.032 0.501 0.501 0.500 0.501 0.501 0.500
## as.factor(state)40 -0.709 0.070 0.503 0.503 0.501 0.503 0.501 0.503
## as.factor(state)41 -0.707 0.032 0.501 0.501 0.500 0.501 0.501 0.500
## as.factor(state)42 -0.709 0.064 0.503 0.502 0.501 0.502 0.501 0.503
## as.factor(state)43 -0.707 0.040 0.501 0.501 0.501 0.501 0.501 0.501
## as.factor(state)44 -0.709 0.070 0.503 0.503 0.501 0.503 0.501 0.503
## as.factor(state)45 -0.708 0.057 0.502 0.502 0.501 0.502 0.501 0.502
## as.factor(state)46 -0.707 0.030 0.500 0.501 0.500 0.501 0.501 0.500
## as.factor(state)47 -0.709 0.113 0.505 0.504 0.500 0.504 0.501 0.506
## as.factor(state)48 -0.709 0.072 0.503 0.503 0.501 0.503 0.501 0.503
## as.factor(state)49 -0.709 0.081 0.503 0.503 0.501 0.503 0.501 0.504
## as.factor(state)50 -0.708 0.045 0.502 0.501 0.501 0.501 0.501 0.502
## as.()8 as.()9 a.()10 a.()11 a.()12 a.()13 a.()14 a.()15
## x
## as.factor(state)2
## as.factor(state)3
## as.factor(state)4
## as.factor(state)5
## as.factor(state)6
## as.factor(state)7
## as.factor(state)8
## as.factor(state)9 0.499
## as.factor(state)10 0.500 0.500
## as.factor(state)11 0.504 0.500 0.501
## as.factor(state)12 0.501 0.500 0.501 0.501
## as.factor(state)13 0.503 0.500 0.501 0.502 0.501
## as.factor(state)14 0.498 0.500 0.500 0.500 0.500 0.500
## as.factor(state)15 0.502 0.500 0.501 0.502 0.501 0.502 0.500
## as.factor(state)16 0.503 0.500 0.501 0.502 0.501 0.502 0.500 0.502
## as.factor(state)17 0.505 0.500 0.501 0.503 0.501 0.502 0.500 0.502
## as.factor(state)18 0.506 0.500 0.501 0.503 0.501 0.503 0.499 0.502
## as.factor(state)19 0.504 0.500 0.501 0.503 0.501 0.502 0.500 0.502
## as.factor(state)20 0.498 0.500 0.500 0.499 0.500 0.500 0.500 0.500
## as.factor(state)21 0.496 0.500 0.500 0.498 0.499 0.499 0.500 0.499
## as.factor(state)22 0.499 0.500 0.500 0.500 0.500 0.500 0.500 0.500
## as.factor(state)23 0.506 0.500 0.501 0.504 0.501 0.503 0.499 0.502
## as.factor(state)24 0.508 0.498 0.500 0.504 0.501 0.503 0.498 0.502
## as.factor(state)25 0.504 0.500 0.501 0.503 0.501 0.502 0.500 0.502
## as.factor(state)26 0.504 0.500 0.501 0.503 0.501 0.502 0.500 0.502
## as.factor(state)27 0.497 0.500 0.500 0.499 0.500 0.500 0.500 0.500
## as.factor(state)28 0.501 0.500 0.501 0.501 0.501 0.501 0.500 0.501
## as.factor(state)29 0.503 0.500 0.501 0.502 0.501 0.502 0.500 0.502
## as.factor(state)30 0.499 0.500 0.500 0.500 0.500 0.500 0.500 0.500
## as.factor(state)31 0.498 0.500 0.500 0.500 0.500 0.500 0.500 0.500
## as.factor(state)32 0.502 0.500 0.501 0.502 0.501 0.502 0.500 0.502
## as.factor(state)33 0.506 0.500 0.501 0.503 0.501 0.503 0.499 0.502
## as.factor(state)34 0.502 0.500 0.501 0.502 0.501 0.502 0.500 0.501
## as.factor(state)35 0.502 0.500 0.501 0.502 0.501 0.502 0.500 0.501
## as.factor(state)36 0.504 0.500 0.501 0.503 0.501 0.502 0.500 0.502
## as.factor(state)37 0.506 0.499 0.500 0.504 0.501 0.503 0.499 0.502
## as.factor(state)38 0.500 0.500 0.501 0.501 0.501 0.501 0.500 0.501
## as.factor(state)39 0.500 0.500 0.501 0.501 0.501 0.501 0.500 0.501
## as.factor(state)40 0.504 0.500 0.501 0.503 0.501 0.502 0.500 0.502
## as.factor(state)41 0.500 0.500 0.501 0.501 0.501 0.501 0.500 0.501
## as.factor(state)42 0.503 0.500 0.501 0.502 0.501 0.502 0.500 0.502
## as.factor(state)43 0.501 0.500 0.501 0.501 0.501 0.501 0.500 0.501
## as.factor(state)44 0.504 0.500 0.501 0.503 0.501 0.502 0.500 0.502
## as.factor(state)45 0.502 0.500 0.501 0.502 0.501 0.502 0.500 0.502
## as.factor(state)46 0.500 0.500 0.501 0.501 0.501 0.501 0.500 0.501
## as.factor(state)47 0.507 0.499 0.500 0.504 0.501 0.503 0.499 0.502
## as.factor(state)48 0.504 0.500 0.501 0.503 0.501 0.502 0.500 0.502
## as.factor(state)49 0.504 0.500 0.501 0.503 0.501 0.502 0.500 0.502
## as.factor(state)50 0.501 0.500 0.501 0.501 0.501 0.501 0.500 0.501
## a.()16 a.()17 a.()18 a.()19 a.()20 a.()21 a.()22 a.()23
## x
## as.factor(state)2
## as.factor(state)3
## as.factor(state)4
## as.factor(state)5
## as.factor(state)6
## as.factor(state)7
## as.factor(state)8
## as.factor(state)9
## as.factor(state)10
## as.factor(state)11
## as.factor(state)12
## as.factor(state)13
## as.factor(state)14
## as.factor(state)15
## as.factor(state)16
## as.factor(state)17 0.502
## as.factor(state)18 0.503 0.504
## as.factor(state)19 0.502 0.503 0.504
## as.factor(state)20 0.500 0.499 0.499 0.499
## as.factor(state)21 0.499 0.498 0.497 0.498 0.500
## as.factor(state)22 0.500 0.500 0.500 0.500 0.500 0.500
## as.factor(state)23 0.503 0.504 0.505 0.504 0.498 0.497 0.500
## as.factor(state)24 0.503 0.505 0.506 0.505 0.497 0.495 0.498 0.507
## as.factor(state)25 0.502 0.503 0.504 0.503 0.499 0.498 0.500 0.504
## as.factor(state)26 0.502 0.503 0.503 0.503 0.499 0.498 0.500 0.504
## as.factor(state)27 0.500 0.499 0.498 0.499 0.500 0.500 0.500 0.498
## as.factor(state)28 0.501 0.501 0.501 0.501 0.500 0.499 0.500 0.501
## as.factor(state)29 0.502 0.502 0.502 0.502 0.500 0.499 0.500 0.503
## as.factor(state)30 0.500 0.500 0.500 0.500 0.500 0.500 0.500 0.499
## as.factor(state)31 0.500 0.500 0.499 0.500 0.500 0.500 0.500 0.499
## as.factor(state)32 0.502 0.502 0.502 0.502 0.500 0.499 0.500 0.502
## as.factor(state)33 0.503 0.504 0.505 0.504 0.499 0.497 0.500 0.505
## as.factor(state)34 0.502 0.502 0.502 0.502 0.500 0.499 0.500 0.502
## as.factor(state)35 0.501 0.502 0.502 0.502 0.500 0.499 0.500 0.502
## as.factor(state)36 0.502 0.503 0.503 0.503 0.499 0.498 0.500 0.503
## as.factor(state)37 0.503 0.504 0.505 0.504 0.498 0.497 0.499 0.506
## as.factor(state)38 0.501 0.501 0.501 0.501 0.500 0.500 0.500 0.501
## as.factor(state)39 0.501 0.501 0.501 0.501 0.500 0.500 0.500 0.500
## as.factor(state)40 0.502 0.503 0.503 0.503 0.499 0.498 0.500 0.503
## as.factor(state)41 0.501 0.501 0.501 0.501 0.500 0.500 0.500 0.500
## as.factor(state)42 0.502 0.503 0.503 0.502 0.500 0.499 0.500 0.503
## as.factor(state)43 0.501 0.501 0.501 0.501 0.500 0.499 0.500 0.501
## as.factor(state)44 0.502 0.503 0.503 0.503 0.499 0.498 0.500 0.503
## as.factor(state)45 0.502 0.502 0.502 0.502 0.500 0.499 0.500 0.502
## as.factor(state)46 0.501 0.501 0.500 0.501 0.500 0.500 0.500 0.500
## as.factor(state)47 0.503 0.504 0.505 0.504 0.498 0.496 0.499 0.506
## as.factor(state)48 0.502 0.503 0.503 0.503 0.499 0.498 0.500 0.503
## as.factor(state)49 0.502 0.503 0.504 0.503 0.499 0.498 0.500 0.504
## as.factor(state)50 0.501 0.502 0.502 0.502 0.500 0.499 0.500 0.501
## a.()24 a.()25 a.()26 a.()27 a.()28 a.()29 a.()30 a.()31
## x
## as.factor(state)2
## as.factor(state)3
## as.factor(state)4
## as.factor(state)5
## as.factor(state)6
## as.factor(state)7
## as.factor(state)8
## as.factor(state)9
## as.factor(state)10
## as.factor(state)11
## as.factor(state)12
## as.factor(state)13
## as.factor(state)14
## as.factor(state)15
## as.factor(state)16
## as.factor(state)17
## as.factor(state)18
## as.factor(state)19
## as.factor(state)20
## as.factor(state)21
## as.factor(state)22
## as.factor(state)23
## as.factor(state)24
## as.factor(state)25 0.505
## as.factor(state)26 0.504 0.503
## as.factor(state)27 0.496 0.499 0.499
## as.factor(state)28 0.500 0.501 0.501 0.500
## as.factor(state)29 0.502 0.502 0.502 0.500 0.501
## as.factor(state)30 0.498 0.500 0.500 0.500 0.500 0.500
## as.factor(state)31 0.497 0.500 0.500 0.500 0.500 0.500 0.500
## as.factor(state)32 0.502 0.502 0.502 0.500 0.501 0.502 0.500 0.500
## as.factor(state)33 0.506 0.504 0.503 0.498 0.501 0.502 0.500 0.499
## as.factor(state)34 0.502 0.502 0.502 0.500 0.501 0.502 0.500 0.500
## as.factor(state)35 0.502 0.502 0.502 0.500 0.501 0.501 0.500 0.500
## as.factor(state)36 0.504 0.503 0.503 0.499 0.501 0.502 0.500 0.500
## as.factor(state)37 0.507 0.504 0.504 0.498 0.501 0.503 0.499 0.499
## as.factor(state)38 0.500 0.501 0.501 0.500 0.501 0.501 0.500 0.500
## as.factor(state)39 0.499 0.501 0.501 0.500 0.501 0.501 0.500 0.500
## as.factor(state)40 0.504 0.503 0.503 0.499 0.501 0.502 0.500 0.500
## as.factor(state)41 0.499 0.501 0.501 0.500 0.501 0.501 0.500 0.500
## as.factor(state)42 0.503 0.503 0.502 0.499 0.501 0.502 0.500 0.500
## as.factor(state)43 0.500 0.501 0.501 0.500 0.501 0.501 0.500 0.500
## as.factor(state)44 0.504 0.503 0.503 0.499 0.501 0.502 0.500 0.500
## as.factor(state)45 0.502 0.502 0.502 0.500 0.501 0.502 0.500 0.500
## as.factor(state)46 0.499 0.501 0.501 0.500 0.501 0.501 0.500 0.500
## as.factor(state)47 0.508 0.504 0.504 0.498 0.501 0.503 0.499 0.499
## as.factor(state)48 0.504 0.503 0.503 0.499 0.501 0.502 0.500 0.500
## as.factor(state)49 0.505 0.503 0.503 0.499 0.501 0.502 0.500 0.500
## as.factor(state)50 0.501 0.502 0.501 0.500 0.501 0.501 0.500 0.500
## a.()32 a.()33 a.()34 a.()35 a.()36 a.()37 a.()38 a.()39
## x
## as.factor(state)2
## as.factor(state)3
## as.factor(state)4
## as.factor(state)5
## as.factor(state)6
## as.factor(state)7
## as.factor(state)8
## as.factor(state)9
## as.factor(state)10
## as.factor(state)11
## as.factor(state)12
## as.factor(state)13
## as.factor(state)14
## as.factor(state)15
## as.factor(state)16
## as.factor(state)17
## as.factor(state)18
## as.factor(state)19
## as.factor(state)20
## as.factor(state)21
## as.factor(state)22
## as.factor(state)23
## as.factor(state)24
## as.factor(state)25
## as.factor(state)26
## as.factor(state)27
## as.factor(state)28
## as.factor(state)29
## as.factor(state)30
## as.factor(state)31
## as.factor(state)32
## as.factor(state)33 0.502
## as.factor(state)34 0.501 0.502
## as.factor(state)35 0.501 0.502 0.501
## as.factor(state)36 0.502 0.503 0.502 0.502
## as.factor(state)37 0.502 0.505 0.502 0.502 0.504
## as.factor(state)38 0.501 0.501 0.501 0.501 0.501 0.501
## as.factor(state)39 0.501 0.501 0.501 0.501 0.501 0.500 0.501
## as.factor(state)40 0.502 0.503 0.502 0.502 0.503 0.503 0.501 0.501
## as.factor(state)41 0.501 0.501 0.501 0.501 0.501 0.500 0.501 0.501
## as.factor(state)42 0.502 0.503 0.502 0.502 0.502 0.503 0.501 0.501
## as.factor(state)43 0.501 0.501 0.501 0.501 0.501 0.501 0.501 0.501
## as.factor(state)44 0.502 0.503 0.502 0.502 0.503 0.503 0.501 0.501
## as.factor(state)45 0.502 0.502 0.501 0.501 0.502 0.502 0.501 0.501
## as.factor(state)46 0.501 0.500 0.501 0.501 0.501 0.500 0.501 0.500
## as.factor(state)47 0.502 0.505 0.502 0.502 0.504 0.506 0.501 0.500
## as.factor(state)48 0.502 0.503 0.502 0.502 0.503 0.504 0.501 0.501
## as.factor(state)49 0.502 0.504 0.502 0.502 0.503 0.504 0.501 0.501
## as.factor(state)50 0.501 0.502 0.501 0.501 0.501 0.501 0.501 0.501
## a.()40 a.()41 a.()42 a.()43 a.()44 a.()45 a.()46 a.()47
## x
## as.factor(state)2
## as.factor(state)3
## as.factor(state)4
## as.factor(state)5
## as.factor(state)6
## as.factor(state)7
## as.factor(state)8
## as.factor(state)9
## as.factor(state)10
## as.factor(state)11
## as.factor(state)12
## as.factor(state)13
## as.factor(state)14
## as.factor(state)15
## as.factor(state)16
## as.factor(state)17
## as.factor(state)18
## as.factor(state)19
## as.factor(state)20
## as.factor(state)21
## as.factor(state)22
## as.factor(state)23
## as.factor(state)24
## as.factor(state)25
## as.factor(state)26
## as.factor(state)27
## as.factor(state)28
## as.factor(state)29
## as.factor(state)30
## as.factor(state)31
## as.factor(state)32
## as.factor(state)33
## as.factor(state)34
## as.factor(state)35
## as.factor(state)36
## as.factor(state)37
## as.factor(state)38
## as.factor(state)39
## as.factor(state)40
## as.factor(state)41 0.501
## as.factor(state)42 0.502 0.501
## as.factor(state)43 0.501 0.501 0.501
## as.factor(state)44 0.502 0.501 0.502 0.501
## as.factor(state)45 0.502 0.501 0.502 0.501 0.502
## as.factor(state)46 0.501 0.500 0.501 0.501 0.501 0.501
## as.factor(state)47 0.503 0.500 0.503 0.501 0.503 0.502 0.500
## as.factor(state)48 0.503 0.501 0.502 0.501 0.503 0.502 0.501 0.504
## as.factor(state)49 0.503 0.501 0.503 0.501 0.503 0.502 0.501 0.504
## as.factor(state)50 0.501 0.501 0.501 0.501 0.501 0.501 0.501 0.501
## a.()48 a.()49
## x
## as.factor(state)2
## as.factor(state)3
## as.factor(state)4
## as.factor(state)5
## as.factor(state)6
## as.factor(state)7
## as.factor(state)8
## as.factor(state)9
## as.factor(state)10
## as.factor(state)11
## as.factor(state)12
## as.factor(state)13
## as.factor(state)14
## as.factor(state)15
## as.factor(state)16
## as.factor(state)17
## as.factor(state)18
## as.factor(state)19
## as.factor(state)20
## as.factor(state)21
## as.factor(state)22
## as.factor(state)23
## as.factor(state)24
## as.factor(state)25
## as.factor(state)26
## as.factor(state)27
## as.factor(state)28
## as.factor(state)29
## as.factor(state)30
## as.factor(state)31
## as.factor(state)32
## as.factor(state)33
## as.factor(state)34
## as.factor(state)35
## as.factor(state)36
## as.factor(state)37
## as.factor(state)38
## as.factor(state)39
## as.factor(state)40
## as.factor(state)41
## as.factor(state)42
## as.factor(state)43
## as.factor(state)44
## as.factor(state)45
## as.factor(state)46
## as.factor(state)47
## as.factor(state)48
## as.factor(state)49 0.503
## as.factor(state)50 0.501 0.502
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.25641574 -0.74675763 -0.05429884 0.77045194 2.28249545
##
## Residual standard error: 13.97811
## Degrees of freedom: 500 total; 449 residual
pggls is a function for the estimation of linear panel models by general feasible generalized least squares, either with or without fixed effects. General FGLS is based on a two-step estimation process: first a model is estimated by OLS (pooling), fixed effects (within) or first differences (fd), then its residuals are used to estimate an error covariance matrix for use in a feasible-GLS analysis. This framework allows the error covariance structure inside every group (if effect="individual", else symmetric) of observations to be fully unrestricted and is therefore robust against any type of intragroup heteroskedasticity and serial correlation. Conversely, this structure is assumed identical across groups and thus general FGLS estimation is inefficient under groupwise heteroskedasticity. Note also that this method requires estimation of T(T+1)/2 variance parameters, thus efficiency requires N > > T (if effect="individual", else the opposite). The model="random" and model="pooling" arguments both produce an unrestricted FGLS model as in Wooldridge, Ch. 10, although the former is deprecated and included only for retro–compatibility reasons. If model="within" (the default) then a FEGLS (fixed effects GLS, see ibid.) is estimated; if model="fd" a FDGLS (first-difference GLS).
#Running a model that allows for a serial correlation structure and heteroscedasticity within panel
mod.pggls <- plm::pggls(y~x, data=state,
index=c("state","time") ,model="random")
## Warning: 'random' argument to pggls() has been renamed as 'pooling'
summary(mod.pggls)
## Random effects model
##
## Call:
## plm::pggls(formula = y ~ x, data = state, model = "random", index = c("state",
## "time"))
##
## Balanced Panel: n = 50, T = 10, N = 500
##
## Residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -26.73069 -11.89724 -0.27186 0.09875 12.98997 24.66032
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 50.670251 0.571413 88.675 < 0.00000000000000022 ***
## x 1.969652 0.020175 97.631 < 0.00000000000000022 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Total Sum of Squares: 1619500
## Residual Sum of Squares: 99177
## Multiple R-squared: 0.93876
#Running a fixed effects model and adding panel corrected standard errors
fe.lm.model <- lm(y~x+as.factor(state), data=state)
model.pcse<-pcse::pcse(fe.lm.model, groupN=state$state, groupT=state$time)
summary(model.pcse)
##
## Results:
##
## Estimate PCSE t value Pr(>|t|)
## (Intercept) 38.505011 3.55907749 10.8188180 2.128884e-24
## x 1.969086 0.02065518 95.3313354 4.332198e-300
## as.factor(state)2 15.093063 5.51996795 2.7342664 6.499183e-03
## as.factor(state)3 6.047563 6.49432565 0.9312073 3.522468e-01
## as.factor(state)4 12.334444 4.31250492 2.8601576 4.431746e-03
## as.factor(state)5 9.088581 5.42321933 1.6758646 9.446088e-02
## as.factor(state)6 12.887020 3.94443903 3.2671363 1.169729e-03
## as.factor(state)7 16.809425 5.20910112 3.2269339 1.342794e-03
## as.factor(state)8 18.706544 5.83644855 3.2051245 1.446316e-03
## as.factor(state)9 8.087048 3.84271245 2.1045155 3.588875e-02
## as.factor(state)10 11.274579 6.84842895 1.6463015 1.004014e-01
## as.factor(state)11 14.358481 6.54058436 2.1952903 2.865388e-02
## as.factor(state)12 15.713256 7.32555050 2.1449932 3.248928e-02
## as.factor(state)13 9.465211 7.18857771 1.3167015 1.886104e-01
## as.factor(state)14 10.229855 6.25030155 1.6366978 1.023943e-01
## as.factor(state)15 20.404478 6.02422296 3.3870722 7.686010e-04
## as.factor(state)16 15.849900 5.65429941 2.8031589 5.279629e-03
## as.factor(state)17 18.154672 5.03196662 3.6078682 3.434189e-04
## as.factor(state)18 12.151837 6.97133310 1.7431152 8.199803e-02
## as.factor(state)19 14.300434 4.13258091 3.4604123 5.908967e-04
## as.factor(state)20 11.019150 4.47424689 2.4627943 1.416024e-02
## as.factor(state)21 13.766580 5.42266099 2.5387130 1.146274e-02
## as.factor(state)22 15.883072 4.21217712 3.7707512 1.845164e-04
## as.factor(state)23 8.351756 6.76223224 1.2350590 2.174544e-01
## as.factor(state)24 6.039102 6.11376657 0.9877874 3.237888e-01
## as.factor(state)25 12.809281 5.78944141 2.2125245 2.743290e-02
## as.factor(state)26 14.599543 4.63303650 3.1511824 1.734874e-03
## as.factor(state)27 7.816195 4.46469606 1.7506669 8.068625e-02
## as.factor(state)28 16.990049 7.38705312 2.2999766 2.190738e-02
## as.factor(state)29 11.107793 3.18189762 3.4909335 5.289274e-04
## as.factor(state)30 14.157462 4.62380734 3.0618625 2.331717e-03
## as.factor(state)31 17.503316 5.77288179 3.0319893 2.570104e-03
## as.factor(state)32 15.085383 6.17979948 2.4410797 1.502876e-02
## as.factor(state)33 10.766089 4.28800292 2.5107467 1.239798e-02
## as.factor(state)34 5.673854 5.46304993 1.0385872 2.995557e-01
## as.factor(state)35 17.029900 5.23530361 3.2528964 1.228515e-03
## as.factor(state)36 11.254995 5.62186374 2.0020042 4.588531e-02
## as.factor(state)37 10.731645 5.38830270 1.9916559 4.701416e-02
## as.factor(state)38 6.400210 5.17350942 1.2371119 2.166922e-01
## as.factor(state)39 13.346846 5.54482367 2.4070821 1.648323e-02
## as.factor(state)40 21.817332 4.66211220 4.6797098 3.808955e-06
## as.factor(state)41 19.631490 5.80715752 3.3805678 7.865578e-04
## as.factor(state)42 15.394579 3.22596032 4.7720920 2.470153e-06
## as.factor(state)43 4.502578 5.87845353 0.7659460 4.441109e-01
## as.factor(state)44 8.788492 4.95970394 1.7719792 7.707614e-02
## as.factor(state)45 3.917253 5.82686939 0.6722741 5.017550e-01
## as.factor(state)46 6.922103 4.88756789 1.4162674 1.573906e-01
## as.factor(state)47 11.188698 3.32536244 3.3646552 8.321487e-04
## as.factor(state)48 15.500484 6.25688867 2.4773469 1.360333e-02
## as.factor(state)49 7.193272 5.32479091 1.3509023 1.774071e-01
## as.factor(state)50 17.054444 4.23306715 4.0288622 6.581459e-05
##
## ---------------------------------------------
##
## # Valid Obs = 500; # Missing Obs = 0; Degrees of Freedom = 449.
### HOW TO RUN A PANEL MODEL WITH A LAGGED DEPENDENT VARIABLE (IF ONE ISN'T ALREADY CODED) ###
state$time <- as.numeric(state$time)
state$state <- as.numeric(state$state)
state$timelag <- state$time - 1
deps <- as.data.frame(cbind(state$state, state$time, state$y, state$yar1))
names(deps) <- c("state", "time", "yLag", "yar1lag")
state2 <- merge(x=state, y=deps,
by.x=c("timelag", "state"),
by.y=c("time", "state"))
#cbind(state2$state, state2$time, state2$y, state2$yLag)
#OLS with LDV
mod.lagged <- lm(y~yLag+x, data=state2); summary(mod.lagged)
##
## Call:
## lm(formula = y ~ yLag + x, data = state2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.3371 -12.2458 -0.1952 13.3304 24.5674
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 51.21601 0.90315 56.71 <2e-16 ***
## yLag -0.00426 0.01184 -0.36 0.719
## x 1.96293 0.02431 80.73 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.32 on 447 degrees of freedom
## Multiple R-squared: 0.9359, Adjusted R-squared: 0.9356
## F-statistic: 3264 on 2 and 447 DF, p-value: < 0.00000000000000022
lag.pcse<-pcse(mod.lagged,groupN=state2$state,groupT=state2$time)
summary(lag.pcse)
##
## Results:
##
## Estimate PCSE t value Pr(>|t|)
## (Intercept) 51.21601275 0.92731493 55.230441 8.369615e-202
## yLag -0.00426034 0.01335459 -0.319017 7.498626e-01
## x 1.96292640 0.02404533 81.634405 1.060222e-270
##
## ---------------------------------------------
##
## # Valid Obs = 450; # Missing Obs = 0; Degrees of Freedom = 447.
### INTERPRETING DYNAMICS ###
# PULSE INPUT
lag.coef<-mod.lagged$coefficients[2]
#lag.coef<-.5 #hypothetical alternative, to illustrate a positive spillover
input.coef<-mod.lagged$coefficients[3]
times<-c(0:10)
pred.pulse<-input.coef*lag.coef^times
plot(y=c(0,pred.pulse),x=c(-1,times),type='l')
# STEP INPUT
pred.step<-cumsum(pred.pulse)
plot(y=c(0,pred.step),x=c(-1,times),type='l')
###UNIT ROOT TEST###
#purtest implements several testing procedures that have been
#proposed to test unit root hypotheses with panel data.
#purtest(y ~ 1, data = state, index = "state", pmax=8, test = "levinlin")
#purtest(y ~ 1, data = state, index = "state", pmax=8, test = "ips")
#purtest(y ~ 1, data = state, index = "state", pmax=8, test = "madwu")
plm::purtest(y ~ 1, data = state, index = "state", pmax=8, test = "hadri")
## Warning in pdata.frame(data, index): column 'time' overwritten by time
## index
##
## Hadri Test (ex. var.: Individual Intercepts) (Heterosked.
## Consistent)
##
## data: y ~ 1
## z = 1.0373, p-value = 0.1498
## alternative hypothesis: at least one series has a unit root
#################################################################
#clean up
rm(list=ls())
#Load libraries
library(geepack)
library(reshape)
library(nlme)
library(car)
library(MuMIn)
library(multgee)
## Loading required package: gnm
##
## Attaching package: 'gnm'
## The following object is masked from 'package:faraway':
##
## wheat
## The following object is masked from 'package:lattice':
##
## barley
## Loading required package: VGAM
## Loading required package: stats4
## Loading required package: splines
##
## Attaching package: 'VGAM'
## The following object is masked from 'package:MuMIn':
##
## AICc
## The following object is masked from 'package:plm':
##
## has.intercept
## The following objects are masked from 'package:faraway':
##
## hormone, logit, pneumo, prplot
## The following object is masked from 'package:car':
##
## logit
## The following object is masked from 'package:lmtest':
##
## lrtest
###BINOMIAL EXAMPLE###
#muscatine<-read.table(file.choose(), header=TRUE, sep="")
muscatine.0<-read.table("muscatine.txt",header=TRUE,sep="")
#turn obesity into a numeric variable
muscatine.0$obese[muscatine.0$obese=="."]<-NA
muscatine.0$obese<-as.numeric(muscatine.0$obese==1)
muscatine<-na.omit(muscatine.0)
#sort the data!
muscatine<-with(muscatine,muscatine[order(id,cAge),])
#Longer Model
long.mod<-geepack::geeglm(obese~gender+I(cAge-12)+I((cAge-12)^2)+
gender:I(cAge-12)+gender:I((cAge-12)^2),
id=id, waves=muscatine$occ,
family=binomial(link="logit"),
data=muscatine, scale.fix=TRUE,
corstr="exchangeable")
summary(long.mod)
##
## Call:
## geepack::geeglm(formula = obese ~ gender + I(cAge - 12) + I((cAge -
## 12)^2) + gender:I(cAge - 12) + gender:I((cAge - 12)^2), family = binomial(link = "logit"),
## data = muscatine, id = id, waves = muscatine$occ, corstr = "exchangeable",
## scale.fix = TRUE)
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) -1.211788 0.050551 574.645 < 2e-16 ***
## gender 0.117138 0.071103 2.714 0.0995 .
## I(cAge - 12) 0.038026 0.013351 8.111 0.0044 **
## I((cAge - 12)^2) -0.017829 0.003391 27.652 0.000000145 ***
## gender:I(cAge - 12) 0.006873 0.018270 0.142 0.7068
## gender:I((cAge - 12)^2) 0.004137 0.004631 0.798 0.3716
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Scale is fixed.
##
## Correlation: Structure = exchangeable Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.539 0.01564
## Number of clusters: 4856 Maximum cluster size: 3
#Normally scale.fix=FALSE
#'"independence"', '"exchangeable"', '"ar1"', '"unstructured"' and '"userdefined"'
#Could only get AR(1) and indepdendence to work.
#shorter model without interactions
short.mod<-update(long.mod,.~.-gender:I(cAge-12)-gender:I((cAge-12)^2))
summary(short.mod)
##
## Call:
## geepack::geeglm(formula = obese ~ gender + I(cAge - 12) + I((cAge -
## 12)^2), family = binomial(link = "logit"), data = muscatine,
## id = id, waves = muscatine$occ, corstr = "exchangeable",
## scale.fix = TRUE)
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) -1.22695 0.04770 661.6 < 2e-16 ***
## gender 0.14705 0.06271 5.5 0.019 *
## I(cAge - 12) 0.04167 0.00910 20.9 0.0000047164240 ***
## I((cAge - 12)^2) -0.01570 0.00231 46.4 0.0000000000099 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Scale is fixed.
##
## Correlation: Structure = exchangeable Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.539 0.0156
## Number of clusters: 4856 Maximum cluster size: 3
#Wald test for whether trajectories differ
anova(long.mod,short.mod)
## Analysis of 'Wald statistic' Table
##
## Model 1 obese ~ gender + I(cAge - 12) + I((cAge - 12)^2) + gender:I(cAge - 12) + gender:I((cAge - 12)^2)
## Model 2 obese ~ gender + I(cAge - 12) + I((cAge - 12)^2)
## Df X2 P(>|Chi|)
## 1 2 0.952 0.62
#short model with AR(1) instead
short.mod.2<-geeglm(obese~gender+I(cAge-12)+I((cAge-12)^2),
id=id, waves=muscatine$occ,
family=binomial(link="logit"),
data=muscatine, scale.fix=TRUE,
corstr="ar1")
summary(short.mod.2)
##
## Call:
## geeglm(formula = obese ~ gender + I(cAge - 12) + I((cAge - 12)^2),
## family = binomial(link = "logit"), data = muscatine, id = id,
## waves = muscatine$occ, corstr = "ar1", scale.fix = TRUE)
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) -1.21953 0.04784 649.76 < 2e-16 ***
## gender 0.13147 0.06287 4.37 0.037 *
## I(cAge - 12) 0.04108 0.00923 19.80 0.0000085928760 ***
## I((cAge - 12)^2) -0.01617 0.00235 47.41 0.0000000000058 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Scale is fixed.
##
## Correlation: Structure = ar1 Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.614 0.0145
## Number of clusters: 4856 Maximum cluster size: 3
#compare models with fit statistics
model.sel(long.mod,short.mod,short.mod.2,rank=QIC)
## Model selection table
## (Int) (cAg-12)^2 cAg-12 gnd gnd:I((cAg-12)^2) gnd:I(cAg-12)
## short.mod -1.23 -0.0157 0.0417 0.147
## short.mod.2 -1.22 -0.0162 0.0411 0.132
## long.mod -1.21 -0.0178 0.0380 0.117 0.00414 0.00687
## corstr qLik QIC delta weight
## short.mod exchng -5094 10196 0.00 0.466
## short.mod.2 ar1 -5094 10196 0.15 0.433
## long.mod exchng -5094 10199 3.07 0.101
## Abbreviations:
## corstr: exchng = 'exchangeable'
## Models ranked by QIC(x)
###COUNT EXAMPLE###
#clean up
rm(list=ls())
#data
#leprosy<-read.table(file.choose(), header=TRUE, sep="")
leprosy<-read.table("leprosy.txt", header=TRUE, sep="")
#create id variable
leprosy$id<-c(1:nrow(leprosy))
#relevel treatment so that Placebo is the reference
leprosy$drug<-relevel(leprosy$drug,"C")
#create binary variable for whether an antibiotic was administered
leprosy$antibiotic<-1-as.numeric(leprosy$drug=="C")
#reshape data
m.leprosy<-melt.data.frame(data=leprosy, measure.vars=c("pre","post"), id=c("id","drug","antibiotic"))
#create time variable
m.leprosy$time<-as.numeric(m.leprosy$variable=="post")
#create inputs
m.leprosy$a<-as.numeric(m.leprosy$time==1 & m.leprosy$drug=="A")
m.leprosy$b<-as.numeric(m.leprosy$time==1 & m.leprosy$drug=="B")
m.leprosy$treat<-as.numeric(m.leprosy$time==1 & m.leprosy$antibiotic==1)
#sort the data
m.leprosy<-with(m.leprosy,m.leprosy[order(id,time),])
#Three Treatment Model
mod.3<-geeglm(value~time+a+b, id=id,
waves=m.leprosy$time,
family=poisson(link="log"),
data=m.leprosy,corstr="exchangeable")
summary(mod.3)
##
## Call:
## geeglm(formula = value ~ time + a + b, family = poisson(link = "log"),
## data = m.leprosy, id = id, waves = m.leprosy$time, corstr = "exchangeable")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) 2.37335 0.08014 877.10 <2e-16 ***
## time -0.00288 0.15701 0.00 0.985
## a -0.56257 0.22198 6.42 0.011 *
## b -0.49528 0.23420 4.47 0.034 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Estimated Scale Parameters:
## Estimate Std.err
## (Intercept) 3.21 0.5
##
## Correlation: Structure = exchangeable Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.738 0.0815
## Number of clusters: 30 Maximum cluster size: 2
#Wald test for whether treated patients differed from placebo patients overall
mod.3.alt<-update(mod.3,.~.-a-b); summary(mod.3.alt)
##
## Call:
## geeglm(formula = value ~ time, family = poisson(link = "log"),
## data = m.leprosy, id = id, waves = m.leprosy$time, corstr = "exchangeable")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) 2.3734 0.0801 877.10 <2e-16 ***
## time -0.3065 0.1010 9.21 0.0024 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Estimated Scale Parameters:
## Estimate Std.err
## (Intercept) 3.75 0.625
##
## Correlation: Structure = exchangeable Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.705 0.094
## Number of clusters: 30 Maximum cluster size: 2
anova(mod.3,mod.3.alt)
## Analysis of 'Wald statistic' Table
##
## Model 1 value ~ time + a + b
## Model 2 value ~ time
## Df X2 P(>|Chi|)
## 1 2 7.34 0.025 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Two Treatment Model
mod.2<-geeglm(value~time+treat, id=id,
waves=m.leprosy$time,
family=poisson(link="log"),
data=m.leprosy,corstr="exchangeable")
summary(mod.2)
##
## Call:
## geeglm(formula = value ~ time + treat, family = poisson(link = "log"),
## data = m.leprosy, id = id, waves = m.leprosy$time, corstr = "exchangeable")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) 2.37335 0.08014 877.10 <2e-16 ***
## time -0.00286 0.15700 0.00 0.9855
## treat -0.52783 0.19883 7.05 0.0079 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Estimated Scale Parameters:
## Estimate Std.err
## (Intercept) 3.23 0.52
##
## Correlation: Structure = exchangeable Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.738 0.081
## Number of clusters: 30 Maximum cluster size: 2
#compare models with fit statistics
model.sel(mod.2,mod.3,rank=QIC)
## Model selection table
## (Intrc) time treat a b qLik QIC delta weight
## mod.2 2.37 -0.00286 -0.528 711 -1419 0.0 0.55
## mod.3 2.37 -0.00288 -0.563 -0.495 711 -1419 0.4 0.45
## Models ranked by QIC(x)
###ORDINAL AND NOMINAL USING "multgee"###
rm(list=ls())
#ORDINAL#
data(arthritis)
alt.fitmod <- ordLORgee(ordered(-y)~sqrt(time)*factor(trt),
data=arthritis,id=id,
LORstr="time.exch",
repeated=time)
summary(alt.fitmod)
## GEE FOR ORDINAL MULTINOMIAL RESPONSES
## version 1.6.0 modified 2017-07-10
##
## Link : Cumulative logit
##
## Local Odds Ratios:
## Structure: time.exch
## Model: 3way
## Homogenous scores: TRUE
##
## call:
## ordLORgee(formula = ordered(-y) ~ sqrt(time) * factor(trt), data = arthritis,
## id = id, repeated = time, LORstr = "time.exch")
##
## Summary of residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.411 -0.273 -0.132 0.000 -0.061 0.939
##
## Number of Iterations: 4
##
## Coefficients:
## Estimate san.se san.z Pr(>|san.z|)
## beta10 -2.8370 0.2570 -11.04 < 2e-16 ***
## beta20 -0.8938 0.2268 -3.94 0.00008 ***
## beta30 0.8552 0.2233 3.83 0.00013 ***
## beta40 2.8445 0.2838 10.02 < 2e-16 ***
## sqrt(time) 0.1086 0.1110 0.98 0.32794
## factor(trt)2 0.0441 0.2948 0.15 0.88107
## sqrt(time):factor(trt)2 0.2978 0.1606 1.85 0.06368 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Local Odds Ratios Estimates:
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## [1,] 0.00 0.00 0.00 0.00 3.18 2.87 2.63 1.36 3.18 2.87 2.63 1.36
## [2,] 0.00 0.00 0.00 0.00 2.87 2.61 2.42 1.33 2.87 2.61 2.42 1.33
## [3,] 0.00 0.00 0.00 0.00 2.63 2.42 2.25 1.30 2.63 2.42 2.25 1.30
## [4,] 0.00 0.00 0.00 0.00 1.36 1.33 1.30 1.09 1.36 1.33 1.30 1.09
## [5,] 3.18 2.87 2.63 1.36 0.00 0.00 0.00 0.00 3.18 2.87 2.63 1.36
## [6,] 2.87 2.61 2.42 1.33 0.00 0.00 0.00 0.00 2.87 2.61 2.42 1.33
## [7,] 2.63 2.42 2.25 1.30 0.00 0.00 0.00 0.00 2.63 2.42 2.25 1.30
## [8,] 1.36 1.33 1.30 1.09 0.00 0.00 0.00 0.00 1.36 1.33 1.30 1.09
## [9,] 3.18 2.87 2.63 1.36 3.18 2.87 2.63 1.36 0.00 0.00 0.00 0.00
## [10,] 2.87 2.61 2.42 1.33 2.87 2.61 2.42 1.33 0.00 0.00 0.00 0.00
## [11,] 2.63 2.42 2.25 1.30 2.63 2.42 2.25 1.30 0.00 0.00 0.00 0.00
## [12,] 1.36 1.33 1.30 1.09 1.36 1.33 1.30 1.09 0.00 0.00 0.00 0.00
##
## pvalue of Null model: 0.000243
#NOMINAL#
#The largest group becomes the baseline. Our largest group is independent housing.
#y: 0=street living, 1=community living, 2=independent living
#Coefficient set 1 refers to street living against independent living.
#Coefficient set 2 refers to community living against independent living.
data(housing)
house.fitmod <- nomLORgee(y~factor(time)*sec,
data=housing,id=id,
repeated=time,
LORstr="time.exch")
summary(house.fitmod)
## GEE FOR NOMINAL MULTINOMIAL RESPONSES
## version 1.6.0 modified 2017-07-10
##
## Link : Baseline Category Logit
##
## Local Odds Ratios:
## Structure: time.exch
## Model: 3way
## Homogenous scores: TRUE
##
## call:
## nomLORgee(formula = y ~ factor(time) * sec, data = housing, id = id,
## repeated = time, LORstr = "time.exch")
##
## Summary of residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.586 -0.278 -0.129 -0.002 0.421 0.909
##
## Number of Iterations: 3
##
## Coefficients:
## Estimate san.se san.z Pr(>|san.z|)
## beta10 1.6607 0.2503 6.64 < 2e-16 ***
## factor(time)6:1 -1.8701 0.3188 -5.87 < 2e-16 ***
## factor(time)12:1 -2.9251 0.3683 -7.94 < 2e-16 ***
## factor(time)24:1 -2.8136 0.3426 -8.21 < 2e-16 ***
## sec:1 -0.5368 0.3370 -1.59 0.1112
## factor(time)6:sec:1 -1.1822 0.4604 -2.57 0.0102 *
## factor(time)12:sec:1 0.0792 0.4831 0.16 0.8698
## factor(time)24:sec:1 0.0327 0.4656 0.07 0.9440
## beta20 1.1664 0.2627 4.44 0.00001 ***
## factor(time)6:2 -0.2545 0.3008 -0.85 0.3974
## factor(time)12:2 -0.5705 0.3118 -1.83 0.0673 .
## factor(time)24:2 -1.0410 0.3072 -3.39 0.0007 ***
## sec:2 -0.1070 0.3476 -0.31 0.7581
## factor(time)6:sec:2 -1.6234 0.4135 -3.93 0.00009 ***
## factor(time)12:sec:2 -2.0485 0.4454 -4.60 < 2e-16 ***
## factor(time)24:sec:2 -1.0496 0.4183 -2.51 0.0121 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Local Odds Ratios Estimates:
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 0.00 0.00 1.36 1.88 1.36 1.88 1.36 1.88
## [2,] 0.00 0.00 1.88 3.69 1.88 3.69 1.88 3.69
## [3,] 1.36 1.88 0.00 0.00 1.36 1.88 1.36 1.88
## [4,] 1.88 3.69 0.00 0.00 1.88 3.69 1.88 3.69
## [5,] 1.36 1.88 1.36 1.88 0.00 0.00 1.36 1.88
## [6,] 1.88 3.69 1.88 3.69 0.00 0.00 1.88 3.69
## [7,] 1.36 1.88 1.36 1.88 1.36 1.88 0.00 0.00
## [8,] 1.88 3.69 1.88 3.69 1.88 3.69 0.00 0.00
##
## pvalue of Null model: <0.0001
###########################################################################
#clean up
rm(list=ls())
#Load libraries
library(lme4)
library(reshape)
library(car)
###BINOMIAL EXAMPLE: OBESITY###
#muscatine<-read.table(file.choose(), header=TRUE, sep="")
muscatine.0<-read.table("muscatine.txt",header=TRUE,sep="")
#turn obesity into a numeric variable
muscatine.0$obese[muscatine.0$obese=="."]<-NA
muscatine.0$obese<-as.numeric(muscatine.0$obese==1)
muscatine<-na.omit(muscatine.0)
#sort the data!
muscatine<-with(muscatine,muscatine[order(id,cAge),])
#Longer Model
long.mod<-glmer(obese~gender+I(cAge-12)+I((cAge-12)^2)+
gender:I(cAge-12)+gender:I((cAge-12)^2)+(1|id),
family=binomial(link="logit"), data=muscatine)
summary(long.mod)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## obese ~ gender + I(cAge - 12) + I((cAge - 12)^2) + gender:I(cAge -
## 12) + gender:I((cAge - 12)^2) + (1 | id)
## Data: muscatine
##
## AIC BIC logLik deviance df.resid
## 7926 7977 -3956 7912 9849
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7362 -0.0203 -0.0156 -0.0088 2.9493
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 143 11.9
## Number of obs: 9856, groups: id, 4856
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.7170 0.2088 -36.96 < 2e-16 ***
## gender 0.0457 0.2202 0.21 0.83558
## I(cAge - 12) 0.1416 0.0426 3.32 0.00089 ***
## I((cAge - 12)^2) -0.0673 0.0105 -6.44 0.00000000012 ***
## gender:I(cAge - 12) 0.0408 0.0593 0.69 0.49124
## gender:I((cAge - 12)^2) 0.0184 0.0143 1.28 0.19884
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) gender I(A-12 I((A-1 g:I(A-1
## gender -0.506
## I(cAge-12) -0.100 0.040
## I((A-12)^2) -0.176 0.247 -0.112
## gnd:I(A-12) 0.019 -0.076 -0.712 0.071
## g:I((A-12)^ 0.170 -0.360 0.076 -0.721 -0.130
#shorter model without interactions
short.mod<-update(long.mod,.~.-gender:I(cAge-12)-gender:I((cAge-12)^2))
summary(short.mod)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: obese ~ gender + I(cAge - 12) + I((cAge - 12)^2) + (1 | id)
## Data: muscatine
##
## AIC BIC logLik deviance df.resid
## 7925 7961 -3957 7915 9851
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.580 -0.020 -0.016 -0.009 3.173
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 142 11.9
## Number of obs: 9856, groups: id, 4856
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.77506 0.20609 -37.73 < 2e-16 ***
## gender 0.17181 0.20326 0.85 0.4
## I(cAge - 12) 0.16287 0.02989 5.45 0.0000000504423559 ***
## I((cAge - 12)^2) -0.05772 0.00723 -7.98 0.0000000000000014 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) gender I(A-12
## gender -0.490
## I(cAge-12) -0.133 -0.008
## I((A-12)^2) -0.089 0.000 -0.151
#Likelihood ratio test for whether trajectories differ
anova(long.mod,short.mod)
## Data: muscatine
## Models:
## short.mod: obese ~ gender + I(cAge - 12) + I((cAge - 12)^2) + (1 | id)
## long.mod: obese ~ gender + I(cAge - 12) + I((cAge - 12)^2) + gender:I(cAge -
## long.mod: 12) + gender:I((cAge - 12)^2) + (1 | id)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## short.mod 5 7925 7961 -3957 7915
## long.mod 7 7926 7977 -3956 7912 2.4 2 0.3
###FIRST COUNT EXAMPLE: EPILEPSY SEIZURES###
#clean up
rm(list=ls())
#data
epilepsy<-read.table("epilepsy.txt", header=TRUE, sep="")
#reshape data
m.epilepsy<-melt.data.frame(data=epilepsy,
measure.vars=c("t0","t1","t2","t3","t4"),
id=c("id","treat","age"))
#create visit variable
m.epilepsy$visit<-as.numeric(substr(m.epilepsy$variable,2,2))
#create dummy for time
m.epilepsy$dummy<-1-as.numeric(m.epilepsy$visit==0)
#create weeks variable
m.epilepsy$weeks<-2*as.numeric(substr(m.epilepsy$variable,2,2))
#rescale seizures variable
m.epilepsy$logT[m.epilepsy$weeks==0]<-log(8)
m.epilepsy$logT[m.epilepsy$weeks!=0]<-log(2)
#Book Model
book.mod<-glmer(value~(dummy|id)+treat+dummy*treat,
offset=logT, family=poisson(link="log"),
data=m.epilepsy)
summary(book.mod)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: value ~ (dummy | id) + treat + dummy * treat
## Data: m.epilepsy
## Offset: logT
##
## AIC BIC logLik deviance df.resid
## 1864 1890 -925 1850 288
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.139 -0.707 -0.062 0.514 6.965
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 0.500 0.707
## dummy 0.232 0.482 0.16
## Number of obs: 295, groups: id, 59
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.0708 0.1403 7.63 0.000000000000023 ***
## treat 0.0512 0.1927 0.27 0.790
## dummy -0.0005 0.1091 0.00 0.996
## treat:dummy -0.3062 0.1504 -2.04 0.042 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) treat dummy
## treat -0.725
## dummy 0.011 -0.013
## treat:dummy -0.014 0.025 -0.709
#Outlier Deleted Model
no.outlier<-glmer(value~(dummy|id)+treat+dummy*treat,
offset=logT, family=poisson(link="log"),
subset=id!=49, data=m.epilepsy)
summary(no.outlier)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: value ~ (dummy | id) + treat + dummy * treat
## Data: m.epilepsy
## Offset: logT
## Subset: id != 49
##
## AIC BIC logLik deviance df.resid
## 1802 1827 -894 1788 283
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.125 -0.671 -0.057 0.514 6.996
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 0.452 0.672
## dummy 0.215 0.464 0.05
## Number of obs: 290, groups: id, 58
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.06939 0.13407 7.98 0.0000000000000015 ***
## treat -0.00801 0.18565 -0.04 0.966
## dummy 0.00777 0.10640 0.07 0.942
## treat:dummy -0.34588 0.14800 -2.34 0.019 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) treat dummy
## treat -0.719
## dummy -0.088 0.057
## treat:dummy 0.056 -0.073 -0.700
#Weeks Model
weeks.mod<-glmer(value~(weeks|id)+treat+weeks*treat,
offset=logT, family=poisson(link="log"),
data=m.epilepsy)
summary(weeks.mod)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: value ~ (weeks | id) + treat + weeks * treat
## Data: m.epilepsy
## Offset: logT
##
## AIC BIC logLik deviance df.resid
## 1924 1950 -955 1910 288
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.379 -0.723 -0.117 0.585 6.631
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 0.52686 0.7259
## weeks 0.00503 0.0709 0.22
## Number of obs: 295, groups: id, 59
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.1039 0.1426 7.74 0.0000000000000098 ***
## treat 0.0175 0.1963 0.09 0.929
## weeks -0.0113 0.0168 -0.67 0.500
## treat:weeks -0.0467 0.0234 -2.00 0.046 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) treat weeks
## treat -0.724
## weeks 0.065 -0.053
## treat:weeks -0.054 0.074 -0.694
#Plot expected counts for treated and untreated IF random effects are zero!
week.index<-c(0:8)
e.treated<-exp(1.10397+0.1748-(.01134+.04672)*week.index)
e.untreated<-exp(1.10397-(.01134)*week.index)
plot(y=e.treated,x=week.index,type='l',ylim=c(2,5))
lines(y=e.untreated,x=week.index,lty=2)
###SECOND COUNT EXAMPLE: REVISITING LEPROSY DATA###
#clean up
rm(list=ls())
#data
#leprosy<-read.table(file.choose(), header=TRUE, sep="")
leprosy<-read.table("leprosy.txt", header=TRUE, sep="")
#create id variable
leprosy$id<-c(1:nrow(leprosy))
#relevel treatment so that Placebo is the reference
leprosy$drug<-relevel(leprosy$drug,"C")
#create binary variable for whether an antibiotic was administered
leprosy$antibiotic<-1-as.numeric(leprosy$drug=="C")
#reshape data
m.leprosy<-melt.data.frame(data=leprosy,
measure.vars=c("pre","post"),
id=c("id","drug","antibiotic"))
#create time variable
m.leprosy$time<-as.numeric(m.leprosy$variable=="post")
#create inputs
m.leprosy$a<-as.numeric(m.leprosy$time==1 & m.leprosy$drug=="A")
m.leprosy$b<-as.numeric(m.leprosy$time==1 & m.leprosy$drug=="B")
m.leprosy$treat<-as.numeric(m.leprosy$time==1 & m.leprosy$antibiotic==1)
#sort the data
m.leprosy<-with(m.leprosy,m.leprosy[order(id,time),])
#Three Treatment Model
mod.3<-glmer(value~time+a+b+(1|id),
family=poisson(link="log"),
data=m.leprosy)
summary(mod.3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: value ~ time + a + b + (1 | id)
## Data: m.leprosy
##
## AIC BIC logLik deviance df.resid
## 362 373 -176 352 55
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8351 -0.6176 0.0249 0.5554 1.9403
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.28 0.529
## Number of obs: 60, groups: id, 30
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.24153 0.11411 19.64 <2e-16 ***
## time 0.00331 0.12260 0.03 0.9785
## a -0.60587 0.20179 -3.00 0.0027 **
## b -0.52311 0.19464 -2.69 0.0072 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) time a
## time -0.187
## a -0.035 -0.583
## b -0.027 -0.604 0.373
#Likelihood ratio test for whether treated patients
#differed from placebo patients overall
mod.3.alt<-update(mod.3,.~.-a-b); summary(mod.3.alt)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: value ~ time + (1 | id)
## Data: m.leprosy
##
## AIC BIC logLik deviance df.resid
## 370 376 -182 364 57
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0118 -0.6351 0.0725 0.4371 2.0274
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.332 0.577
## Number of obs: 60, groups: id, 30
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.2186 0.1218 18.21 <2e-16 ***
## time -0.3065 0.0848 -3.61 0.0003 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## time -0.295
anova(mod.3,mod.3.alt)
## Data: m.leprosy
## Models:
## mod.3.alt: value ~ time + (1 | id)
## mod.3: value ~ time + a + b + (1 | id)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod.3.alt 3 370 376 -182 364
## mod.3 5 362 373 -176 352 11.6 2 0.003 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###########################################################
#clean up
rm(list=ls())
#load libraries
library(lme4)
library(reshape)
library(mice)
#load data
amenorrhea<-read.table("amenorrhea.txt", header=TRUE, sep="")
#turn status into a numeric variable
amenorrhea$status[amenorrhea$status=="."]<-NA
amenorrhea$status<-as.numeric(amenorrhea$status==1)
#start time at 0 (necessary for convergence)
amenorrhea$time<-amenorrhea$time-1
#Create Experimental Time Terms
amenorrhea$dt<-amenorrhea$time*amenorrhea$dose
amenorrhea$dt2<-I(amenorrhea$time^2)*amenorrhea$dose
#Book Model from Chapter 14 (differ slightly with time rescale, but they converge
book.amenorrhea<-glmer(status~(1|id)+time+I(time^2)+dt+dt2,
family=binomial(link="logit"),
data=amenorrhea)
summary(book.amenorrhea)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: status ~ (1 | id) + time + I(time^2) + dt + dt2
## Data: amenorrhea
##
## AIC BIC logLik deviance df.resid
## 3925 3962 -1957 3913 3610
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.775 -0.469 -0.233 0.468 4.296
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 4.35 2.09
## Number of obs: 3616, groups: id, 1151
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.4604 0.1397 -17.61 < 2e-16 ***
## time 0.7561 0.1984 3.81 0.00014 ***
## I(time^2) 0.0340 0.0655 0.52 0.60385
## dt 0.8861 0.2513 3.53 0.00042 ***
## dt2 -0.2579 0.0879 -2.93 0.00335 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) time I(t^2) dt
## time -0.417
## I(time^2) 0.238 -0.945
## dt -0.073 -0.618 0.646
## dt2 0.057 0.598 -0.678 -0.956
#reshape long to wide
wide<-reshape(amenorrhea, idvar=c("id", "dose"),
timevar="time", direction="wide")
#multiple random imputation using predictive mean matching like the book
m<-5 #number of imputations, definitely worth increasing!
w.imp<-mice(wide, m=m, defaultMethod="pmm")
##
## iter imp variable
## 1 1 status.1 status.2 status.3
## 1 2 status.1 status.2 status.3
## 1 3 status.1 status.2 status.3
## 1 4 status.1 status.2 status.3
## 1 5 status.1 status.2 status.3
## 2 1 status.1 status.2 status.3
## 2 2 status.1 status.2 status.3
## 2 3 status.1 status.2 status.3
## 2 4 status.1 status.2 status.3
## 2 5 status.1 status.2 status.3
## 3 1 status.1 status.2 status.3
## 3 2 status.1 status.2 status.3
## 3 3 status.1 status.2 status.3
## 3 4 status.1 status.2 status.3
## 3 5 status.1 status.2 status.3
## 4 1 status.1 status.2 status.3
## 4 2 status.1 status.2 status.3
## 4 3 status.1 status.2 status.3
## 4 4 status.1 status.2 status.3
## 4 5 status.1 status.2 status.3
## 5 1 status.1 status.2 status.3
## 5 2 status.1 status.2 status.3
## 5 3 status.1 status.2 status.3
## 5 4 status.1 status.2 status.3
## 5 5 status.1 status.2 status.3
## Warning: Number of logged events: 8
#reshape imputed data sets to long form
long.data<-list(NA,m)
for(i in 1:m)
{
long.data[[i]]<-melt.data.frame(data=complete(w.imp,i),
measure.vars=c("status.0","status.1",
"status.2","status.3"),
id=c("id","dose"))
long.data[[i]]$time<-as.numeric(substr(long.data[[i]]$variable,8,8))
long.data[[i]]$dt<-long.data[[i]]$time*long.data[[i]]$dose
long.data[[i]]$dt2<-I(long.data[[i]]$time^2)*long.data[[i]]$dose
}
#Now: What does our model look like with imputed data?
#Note: "status" is now "value"
imputed.models<-list(NA,m)
for(i in 1:m)
{
imputed.models[[i]]<-glmer(value~(1|id)+time+I(time^2)+dt+dt2,
family=binomial(link="logit"),
data=long.data[[i]])
summary(imputed.models[[i]])
}
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00118607
## (tol = 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00121979
## (tol = 0.001, component 1)
#Average coefficients
coeffs<-NULL
for(i in 1:m)
{
coeffs<-rbind(coeffs,fixef(imputed.models[[i]]))
}
avg.coef<-apply(coeffs,2,mean)
#Between variance of coefficient estimates
between<-apply(coeffs,2,var)
#Within variance of coefficient estimates
errVars<-NULL
for(i in 1:m)
{
errVars<-rbind(errVars,diag(vcov(imputed.models[[i]])))
}
within<-apply(errVars,2,mean)
#Obtain Standard Errors of Averaged Fixed Effects
final.se <- sqrt(within + ((m+1)/m)*between)
#t- or z-ratios
test.stats<-avg.coef/final.se
#degrees of freedom
deg.free <- (m-1)*(1+(1/(m+1))*within/between)^2
#p-values for z-test
p.values.z<-2*(1-pnorm(abs(test.stats)))
#p-values for t-test
p.values.t<-2*(1-pt(abs(test.stats), df=deg.free))
#All results from imputation
avg.coef
## (Intercept) time I(time^2) dt dt2
## -2.0731 0.3822 -0.0238 0.9690 -0.3163
final.se
## (Intercept) time I(time^2) dt dt2
## 0.1685 0.2214 0.0726 0.4436 0.1499
test.stats
## (Intercept) time I(time^2) dt dt2
## -12.303 1.727 -0.327 2.184 -2.110
p.values.t
## (Intercept) time I(time^2) dt dt2
## 0.0000447 0.1324795 0.7542532 0.0878135 0.0952300
#make a LaTeX table
library(xtable)
xtable(cbind(avg.coef,final.se,test.stats,p.values.t),digits=4)
## % latex table generated in R 3.5.1 by xtable 1.8-3 package
## % Fri Jan 4 21:10:03 2019
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrr}
## \hline
## & avg.coef & final.se & test.stats & p.values.t \\
## \hline
## (Intercept) & -2.0731 & 0.1685 & -12.3030 & 0.0000 \\
## time & 0.3822 & 0.2214 & 1.7267 & 0.1325 \\
## I(time\verb|^|2) & -0.0238 & 0.0726 & -0.3270 & 0.7543 \\
## dt & 0.9690 & 0.4436 & 2.1842 & 0.0878 \\
## dt2 & -0.3163 & 0.1499 & -2.1104 & 0.0952 \\
## \hline
## \end{tabular}
## \end{table}
#Compare to ignoring missing
summary(book.amenorrhea)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: status ~ (1 | id) + time + I(time^2) + dt + dt2
## Data: amenorrhea
##
## AIC BIC logLik deviance df.resid
## 3925 3962 -1957 3913 3610
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.775 -0.469 -0.233 0.468 4.296
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 4.35 2.09
## Number of obs: 3616, groups: id, 1151
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.4604 0.1397 -17.61 < 2e-16 ***
## time 0.7561 0.1984 3.81 0.00014 ***
## I(time^2) 0.0340 0.0655 0.52 0.60385
## dt 0.8861 0.2513 3.53 0.00042 ***
## dt2 -0.2579 0.0879 -2.93 0.00335 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) time I(t^2) dt
## time -0.417
## I(time^2) 0.238 -0.945
## dt -0.073 -0.618 0.646
## dt2 0.057 0.598 -0.678 -0.956
###########################################################
#clean up
rm(list=ls())
#packages
library(foreign) #Stata data
library(survival) #Sufficient for Cox model
##
## Attaching package: 'survival'
## The following objects are masked from 'package:faraway':
##
## rats, solder
library(eha) #Parametric models and another version of Cox
##
## Attaching package: 'eha'
## The following objects are masked from 'package:VGAM':
##
## dgompertz, dmakeham, pgompertz, pmakeham, qgompertz, qmakeham,
## rgompertz, rmakeham
#load cabinet duration data from Brad Jones's website
#http://psfaculty.ucdavis.edu/bsjjones/cabinet.dta
cab<-read.dta("cabinet.dta")
###WEIBULL MODEL###
#Weibull Regression: Proportional hazards model with baseline hazard(s)
#from the Weibull family of distributions. Allows for stratification
#with different scale and shape in each stratum, and left truncated
#and right censored data.
#results are consistent with p.61, except for the constant
weib.cabinet<-eha::weibreg(Surv(time=durat, event=censor)~
invest+polar+numst+format+postelec+caretakr,
data=cab)
summary(weib.cabinet)
## Call:
## eha::weibreg(formula = Surv(time = durat, event = censor) ~ invest +
## polar + numst + format + postelec + caretakr, data = cab)
##
## Covariate Mean Coef Exp(Coef) se(Coef) Wald p
## invest 0.332 0.383 1.466 0.137 0.005
## polar 10.521 0.023 1.023 0.006 0.000
## numst 0.713 -0.601 0.548 0.131 0.000
## format 1.690 0.132 1.142 0.044 0.002
## postelec 0.665 -0.879 0.415 0.138 0.000
## caretakr 0.009 1.726 5.618 0.276 0.000
##
## log(scale) 2.985 19.795 0.128 0.000
## log(shape) 0.258 1.294 0.050 0.000
##
## Events 271
## Total time at risk 5789.5
## Max. log. likelihood -1014.6
## LR test statistic 172
## Degrees of freedom 6
## Overall p-value 0
###LOG-LOGISTIC MODEL###
#Parametric Proportional Hazards Regression
#Proportional hazards model with parametric baseline hazard(s).
#Allows for stratification with different scale and shape
#in each stratum, and left truncated and right censored data.
log.logis.cabinet<-eha::phreg(Surv(time=durat, event=censor)~
invest+polar+numst+format+postelec+caretakr,
data=cab, dist="loglogistic")
summary(log.logis.cabinet)
## Call:
## eha::phreg(formula = Surv(time = durat, event = censor) ~ invest +
## polar + numst + format + postelec + caretakr, data = cab,
## dist = "loglogistic")
##
## Covariate W.mean Coef Exp(Coef) se(Coef) Wald p
## (Intercept) 2.939 2.816 0.297
## invest 0.332 0.382 1.466 0.137 0.005
## polar 10.521 0.023 1.023 0.006 0.000
## numst 0.713 -0.595 0.552 0.132 0.000
## format 1.690 0.132 1.141 0.044 0.003
## postelec 0.665 -0.869 0.419 0.140 0.000
## caretakr 0.009 1.741 5.704 0.279 0.000
##
## log(scale) 5.187 2.323 0.026
## log(shape) 0.278 0.074 0.000
##
## Events 271
## Total time at risk 5789.5
## Max. log. likelihood -1014.6
## LR test statistic 165.97
## Degrees of freedom 6
## Overall p-value 0
###COX PROPORTIONAL HAZARDS MODEL###
#Cox proportional hazards regression model.
#Time dependent variables, time dependent strata,
#multiple events per subject, and other extensions
#are incorporated using the counting process formulation
#of Andersen and Gill.
#Efron is Default, see p. 60
#Note the two possible interpretations
cox.cabinet<-survival::coxph(Surv(durat, censor)~
invest+polar+numst+format+postelec+caretakr,
data=cab)
summary(cox.cabinet)
## Call:
## survival::coxph(formula = Surv(durat, censor) ~ invest + polar +
## numst + format + postelec + caretakr, data = cab)
##
## n= 314, number of events= 271
##
## coef exp(coef) se(coef) z Pr(>|z|)
## invest 0.38714 1.47276 0.13713 2.82 0.0048 **
## polar 0.02334 1.02361 0.00562 4.15 0.00003275722 ***
## numst -0.58262 0.55843 0.13223 -4.41 0.00001051805 ***
## format 0.13001 1.13884 0.04387 2.96 0.0030 **
## postelec -0.86112 0.42269 0.14062 -6.12 0.00000000091 ***
## caretakr 1.71040 5.53116 0.28282 6.05 0.00000000147 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## invest 1.473 0.679 1.126 1.927
## polar 1.024 0.977 1.012 1.035
## numst 0.558 1.791 0.431 0.724
## format 1.139 0.878 1.045 1.241
## postelec 0.423 2.366 0.321 0.557
## caretakr 5.531 0.181 3.177 9.628
##
## Concordance= 0.725 (se = 0.016 )
## Rsquare= 0.407 (max possible= 1 )
## Likelihood ratio test= 164 on 6 df, p=<2e-16
## Wald test = 176 on 6 df, p=<2e-16
## Score (logrank) test = 216 on 6 df, p=<2e-16
#Breslow, see p. 60
#Note the two possible interpretations
cox.cabinet.2<-coxph(Surv(durat, censor)~
invest+polar+numst+format+postelec+caretakr,
data=cab, method="breslow")
summary(cox.cabinet.2)
## Call:
## coxph(formula = Surv(durat, censor) ~ invest + polar + numst +
## format + postelec + caretakr, data = cab, method = "breslow")
##
## n= 314, number of events= 271
##
## coef exp(coef) se(coef) z Pr(>|z|)
## invest 0.37841 1.45996 0.13742 2.75 0.0059 **
## polar 0.02245 1.02270 0.00562 3.99 0.000065612 ***
## numst -0.56949 0.56581 0.13207 -4.31 0.000016166 ***
## format 0.12540 1.13360 0.04396 2.85 0.0043 **
## postelec -0.83272 0.43487 0.14044 -5.93 0.000000003 ***
## caretakr 1.54278 4.67757 0.28011 5.51 0.000000036 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## invest 1.460 0.685 1.115 1.911
## polar 1.023 0.978 1.011 1.034
## numst 0.566 1.767 0.437 0.733
## format 1.134 0.882 1.040 1.236
## postelec 0.435 2.300 0.330 0.573
## caretakr 4.678 0.214 2.701 8.099
##
## Concordance= 0.725 (se = 0.016 )
## Rsquare= 0.384 (max possible= 1 )
## Likelihood ratio test= 152 on 6 df, p=<2e-16
## Wald test = 162 on 6 df, p=<2e-16
## Score (logrank) test = 192 on 6 df, p=<2e-16
#Alternative estimator in "eha"
cox.cabinet.3<-eha::coxreg(Surv(durat, censor)~
invest+polar+numst+format+postelec+caretakr,
data=cab, method="breslow")
summary(cox.cabinet.3)
## Call:
## eha::coxreg(formula = Surv(durat, censor) ~ invest + polar +
## numst + format + postelec + caretakr, data = cab, method = "breslow")
##
## Covariate Mean Coef Rel.Risk S.E. Wald p
## invest 0.332 0.378 1.460 0.137 0.006
## polar 10.521 0.022 1.023 0.006 0.000
## numst 0.713 -0.569 0.566 0.132 0.000
## format 1.690 0.125 1.134 0.044 0.004
## postelec 0.665 -0.833 0.435 0.140 0.000
## caretakr 0.009 1.543 4.678 0.280 0.000
##
## Events 271
## Total time at risk 5789.5
## Max. log. likelihood -1299.9
## LR test statistic 152.37
## Degrees of freedom 6
## Overall p-value 0
#HW: load UN peacekeeping data from Brad Jones's website
#Run two event history models: one parametric and one Cox
#un<-read.dta("//spia.uga.edu/faculty_pages/monogan/teaching/pd/UNFINAL.dta")
###########################################################
###ADDITIONAL TOPICS FOR FINAL EVENT HISTORY CLASS###
###RESIDUAL ANALYSES###
#"survival" package works a bit better here.
#parametric survival regression model.
#These are location-scale models for an arbitrary transform
#of the time variable; the most common cases use a log
#transformation, leading to accelerated failure time models.
#Weibull Model, accelerated failure time form. Residuals in time scale for "response".
weib.cabinet.b<-survival::survreg(Surv(time=durat, event=censor)~
invest+polar+numst+format+postelec+caretakr,
data=cab, dist='weibull')
summary(weib.cabinet.b)
##
## Call:
## survival::survreg(formula = Surv(time = durat, event = censor) ~
## invest + polar + numst + format + postelec + caretakr, data = cab,
## dist = "weibull")
## Value Std. Error z p
## (Intercept) 2.98543 0.12811 23.30 < 2e-16
## invest -0.29582 0.10590 -2.79 0.0052
## polar -0.01794 0.00428 -4.19 0.000027425315
## numst 0.46489 0.10058 4.62 0.000003800260
## format -0.10237 0.03359 -3.05 0.0023
## postelec 0.67961 0.10438 6.51 0.000000000075
## caretakr -1.33401 0.20175 -6.61 0.000000000038
## Log(scale) -0.25762 0.05006 -5.15 0.000000265343
##
## Scale= 0.773
##
## Weibull distribution
## Loglik(model)= -1015 Loglik(intercept only)= -1101
## Chisq= 172 on 6 degrees of freedom, p= 1.7e-34
## Number of Newton-Raphson Iterations: 5
## n= 314
t.resid<-residuals(weib.cabinet.b,type="response")
plot(y=t.resid,x=cab$polar)
lines(lowess(x=cab$polar,y=t.resid),col='red')
#Log-Logistic Model, accelerated failure time form. Residuals in time scale for "response".
log.logis.cabinet.b<-survreg(Surv(time=durat, event=censor)~
invest+polar+numst+format+postelec+caretakr,
data=cab, dist='loglogistic')
summary(log.logis.cabinet.b)
##
## Call:
## survreg(formula = Surv(time = durat, event = censor) ~ invest +
## polar + numst + format + postelec + caretakr, data = cab,
## dist = "loglogistic")
## Value Std. Error z p
## (Intercept) 2.72882 0.15959 17.10 < 2e-16
## invest -0.33675 0.12781 -2.63 0.0084
## polar -0.02220 0.00526 -4.22 0.000024793
## numst 0.48307 0.12125 3.98 0.000067745
## format -0.10935 0.04197 -2.61 0.0092
## postelec 0.64088 0.12403 5.17 0.000000238
## caretakr -1.26921 0.23103 -5.49 0.000000039
## Log(scale) -0.56577 0.05114 -11.06 < 2e-16
##
## Scale= 0.568
##
## Log logistic distribution
## Loglik(model)= -1025 Loglik(intercept only)= -1099
## Chisq= 149 on 6 degrees of freedom, p= 1.4e-29
## Number of Newton-Raphson Iterations: 4
## n= 314
t2.resid<-residuals(log.logis.cabinet.b,type="response")
plot(y=t2.resid,x=cab$polar)
lines(lowess(x=cab$polar,y=t2.resid),col='red')
#Cox Model
m.resid<-residuals(cox.cabinet,type="martingale")
plot(y=m.resid,x=cab$polar)
lines(lowess(x=cab$polar,y=m.resid),col='red')
###FRAILTY MODELS###
#"survival" package works a bit better here.
#create an ID variable
cab$ID<-row.names(cab)
#Log-Logistic Model, accelerated failure time form. Residuals in time scale for "response".
log.logis.cabinet.b.frail<-survreg(Surv(time=durat, event=censor)~
invest+polar+numst+format+postelec+caretakr+
frailty.gaussian(ID,method="aic"),
data=cab, dist='loglogistic')
#summary(log.logis.cabinet.b.frail)
#Cox Model
cox.cabinet.2.frail<-coxph(Surv(durat, censor)~
invest+polar+numst+format+postelec+caretakr+
frailty.gamma(ID,method="em"),
data=cab, method="breslow")
summary(cox.cabinet.2.frail)
## Call:
## coxph(formula = Surv(durat, censor) ~ invest + polar + numst +
## format + postelec + caretakr + frailty.gamma(ID, method = "em"),
## data = cab, method = "breslow")
##
## n= 314, number of events= 271
##
## coef se(coef) se2 Chisq DF p
## invest 0.3784 0.13742 0.13742 7.58 1 0.005900000
## polar 0.0224 0.00562 0.00562 15.93 1 0.000066000
## numst -0.5695 0.13207 0.13207 18.59 1 0.000016000
## format 0.1254 0.04396 0.04396 8.14 1 0.004300000
## postelec -0.8327 0.14044 0.14044 35.16 1 0.000000003
## caretakr 1.5428 0.28011 0.28011 30.34 1 0.000000036
## frailty.gamma(ID, method 0.00 0 0.900000000
##
## exp(coef) exp(-coef) lower .95 upper .95
## invest 1.460 0.685 1.115 1.911
## polar 1.023 0.978 1.011 1.034
## numst 0.566 1.767 0.437 0.733
## format 1.134 0.882 1.040 1.236
## postelec 0.435 2.300 0.330 0.573
## caretakr 4.678 0.214 2.701 8.099
##
## Iterations: 6 outer, 34 Newton-Raphson
## Variance of random effect= 0.0000005 I-likelihood = -1299.9
## Degrees of freedom for terms= 1 1 1 1 1 1 0
## Concordance= 0.727 (se = 0.727 )
## Likelihood ratio test= 152 on 6 df, p=<2e-16
###########################################################
#Discrete-Time Model and Conditional Logit Code
#clean up
rm(list=ls())
#load libraries
library(foreign)
library(lme4)
library(survival)
library(mgcv)
## This is mgcv 1.8-26. For overview type 'help("mgcv-package")'.
##
## Attaching package: 'mgcv'
## The following object is masked from 'package:VGAM':
##
## s
#Load Data
#cong<-read.dta('http://psfaculty.ucdavis.edu/bsjjones/career.dta')
cong<-read.dta('career.dta')
#Change Stata Names
cong$d<-cong[,34]
cong$d.2<-abs(1-cong$d)
cong$t<-cong[,35]
###SPECIFYING THE BASELINE HAZARD RATE IN A LOGIT MODEL###
###NOTE: STANDARD ERRORS ARE NOT CLUSTER-CORRECTED, AS IN THE BOOK###
###ALTERNATIVE: FRAILTY TERM BY MEMBER###
#Exponential
exp.cong<-glm(d~rep, family=binomial(link="logit"), data=cong)
summary(exp.cong)
##
## Call:
## glm(formula = d ~ rep, family = binomial(link = "logit"), data = cong)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.417 -0.417 -0.381 -0.381 2.306
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.5863 0.0721 -35.88 <2e-16 ***
## rep 0.1894 0.1069 1.77 0.076 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2707.4 on 5053 degrees of freedom
## Residual deviance: 2704.3 on 5052 degrees of freedom
## (476 observations deleted due to missingness)
## AIC: 2708
##
## Number of Fisher Scoring iterations: 5
exp.cong.2<-lme4::glmer(d~rep+(1|memberid),
family=binomial(link="logit"),
data=cong)
summary(exp.cong.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: d ~ rep + (1 | memberid)
## Data: cong
##
## AIC BIC logLik deviance df.resid
## 2660 2679 -1327 2654 5051
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.682 -0.226 -0.192 -0.178 3.991
##
## Random effects:
## Groups Name Variance Std.Dev.
## memberid (Intercept) 1.85 1.36
## Number of obs: 5054, groups: memberid, 944
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.870 0.117 -24.46 <2e-16 ***
## rep 0.164 0.154 1.07 0.29
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## rep -0.581
#Linear function of time
linear.cong<-glm(d~rep+t,
family=binomial(link="logit"), data=cong)
summary(linear.cong)
##
## Call:
## glm(formula = d ~ rep + t, family = binomial(link = "logit"),
## data = cong)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.464 -0.430 -0.400 -0.347 2.645
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.2574 0.1036 -21.79 < 2e-16 ***
## rep 0.1585 0.1073 1.48 0.14
## t -0.0756 0.0185 -4.09 0.000043 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2707.4 on 5053 degrees of freedom
## Residual deviance: 2686.0 on 5051 degrees of freedom
## (476 observations deleted due to missingness)
## AIC: 2692
##
## Number of Fisher Scoring iterations: 5
linear.cong.2<-glmer(d~rep+t+(1|memberid),
family=binomial(link="logit"),
data=cong)
summary(linear.cong.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: d ~ rep + t + (1 | memberid)
## Data: cong
##
## AIC BIC logLik deviance df.resid
## 2658 2684 -1325 2650 5050
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.763 -0.199 -0.180 -0.159 3.767
##
## Random effects:
## Groups Name Variance Std.Dev.
## memberid (Intercept) 2.79 1.67
## Number of obs: 5054, groups: memberid, 944
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.2523 0.2675 -12.16 <2e-16 ***
## rep 0.1835 0.1732 1.06 0.289
## t 0.0638 0.0366 1.74 0.082 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) rep
## rep -0.344
## t -0.868 0.067
#Quadratic function of time
quad.cong<-glm(d~rep+duration+I(duration^2),
family=binomial(link="logit"),
data=cong)
summary(quad.cong)
##
## Call:
## glm(formula = d ~ rep + duration + I(duration^2), family = binomial(link = "logit"),
## data = cong)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.616 -0.421 -0.373 -0.337 2.472
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.98500 0.12964 -15.31 < 2e-16 ***
## rep 0.16672 0.10754 1.55 0.12108
## duration -0.22354 0.04771 -4.68 0.0000028 ***
## I(duration^2) 0.01222 0.00349 3.50 0.00046 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2707.4 on 5053 degrees of freedom
## Residual deviance: 2675.5 on 5050 degrees of freedom
## (476 observations deleted due to missingness)
## AIC: 2684
##
## Number of Fisher Scoring iterations: 5
quad.cong.2<-glmer(d~rep+duration+I(duration^2)+(1|memberid),
family=binomial(link="logit"),
data=cong)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00319156
## (tol = 0.001, component 1)
summary(quad.cong.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: d ~ rep + duration + I(duration^2) + (1 | memberid)
## Data: cong
##
## AIC BIC logLik deviance df.resid
## 2657 2690 -1324 2647 5049
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.738 -0.216 -0.185 -0.168 3.857
##
## Random effects:
## Groups Name Variance Std.Dev.
## memberid (Intercept) 2.31 1.52
## Number of obs: 5054, groups: memberid, 944
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.94441 0.28691 -10.26 <2e-16 ***
## rep 0.18354 0.16395 1.12 0.263
## duration -0.04940 0.07185 -0.69 0.492
## I(duration^2) 0.00785 0.00453 1.73 0.083 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) rep duratn
## rep -0.296
## duration -0.823 0.025
## I(duratn^2) 0.568 0.006 -0.891
## convergence code: 0
## Model failed to converge with max|grad| = 0.00319156 (tol = 0.001, component 1)
#Log of duration
log.cong<-glm(d~rep+log(duration),
family=binomial(link="logit"),
data=cong)
summary(log.cong)
##
## Call:
## glm(formula = d ~ rep + log(duration), family = binomial(link = "logit"),
## data = cong)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.508 -0.415 -0.377 -0.338 2.552
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.1365 0.1014 -21.06 < 2e-16 ***
## rep 0.1560 0.1074 1.45 0.15
## log(duration) -0.3896 0.0679 -5.74 0.0000000096 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2707.4 on 5053 degrees of freedom
## Residual deviance: 2671.1 on 5051 degrees of freedom
## (476 observations deleted due to missingness)
## AIC: 2677
##
## Number of Fisher Scoring iterations: 5
log.cong.2<-glmer(d~rep+log(duration)+(1|memberid),
family=binomial(link="logit"),
data=cong)
summary(log.cong.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: d ~ rep + log(duration) + (1 | memberid)
## Data: cong
##
## AIC BIC logLik deviance df.resid
## 2662 2688 -1327 2654 5050
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.674 -0.229 -0.195 -0.179 4.062
##
## Random effects:
## Groups Name Variance Std.Dev.
## memberid (Intercept) 1.75 1.32
## Number of obs: 5054, groups: memberid, 944
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.8219 0.2228 -12.67 <2e-16 ***
## rep 0.1623 0.1519 1.07 0.29
## log(duration) -0.0321 0.1221 -0.26 0.79
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) rep
## rep -0.339
## log(duratn) -0.853 0.044
Fits a generalized additive model (GAM) to data, the term ‘GAM’ being taken to include any quadratically penalized GLM and a variety of other models estimated by a quadratically penalised likelihood type approach (see family.mgcv). The degree of smoothness of model terms is estimated as part of fitting. gam can also fit any GLM subject to multiple quadratic penalties (including estimation of degree of penalization). Confidence/credible intervals are readily available for any quantity predicted using a fitted model.
Smooth terms are represented using penalized regression splines (or similar smoothers) with smoothing parameters selected by GCV/UBRE/AIC/REML or by regression splines with fixed degrees of freedom (mixtures of the two are permitted). Multi-dimensional smooths are available using penalized thin plate regression splines (isotropic) or tensor product splines (when an isotropic smooth is inappropriate), and users can add smooths. Linear functionals of smooths can also be included in models. For an overview of the smooths available see smooth.terms. For more on specifying models see gam.models, random.effects and linear.functional.terms. For more on model selection see gam.selection. Do read gam.check and choose.k.
#Smoothed spline
#Generalized Additive Models With Integrated Smoothness Estimation
spline.cong<-mgcv::gam(d~rep+s(duration),
family=binomial(link="logit"),
data=cong)
summary(spline.cong)
##
## Family: binomial
## Link function: logit
##
## Formula:
## d ~ rep + s(duration)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.6240 0.0736 -35.66 <2e-16 ***
## rep 0.1674 0.1076 1.56 0.12
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(duration) 4.66 5.58 44.4 0.000000062 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.00938 Deviance explained = 1.8%
## UBRE = -0.4713 Scale est. = 1 n = 5054
plot(spline.cong)
spline.cong.2<-gam(d~rep+s(duration)+s(memberid,bs='re'),
family=binomial(link="logit"),
data=cong)
summary(spline.cong.2)
##
## Family: binomial
## Link function: logit
##
## Formula:
## d ~ rep + s(duration) + s(memberid, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.436 0.130 -18.75 <2e-16 ***
## rep 0.153 0.108 1.42 0.16
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(duration) 4.649 5.57 44.90 0.000000049 ***
## s(memberid) 0.747 1.00 3.01 0.045 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0103 Deviance explained = 1.93%
## UBRE = -0.47174 Scale est. = 1 n = 5054
plot(spline.cong.2)
###Cox/Conditional Logit Model###
#Estimates a logistic regression model by maximising the conditional likelihood.
conditional.cong<-survival::clogit(d~rep+strata(t),
method="approximate",
data=cong)
summary(conditional.cong)
## Call:
## coxph(formula = Surv(rep(1, 5530L), d) ~ rep + strata(t), data = cong,
## method = "breslow")
##
## n= 5054, number of events= 382
## (476 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## rep 0.153 1.165 0.103 1.48 0.14
##
## exp(coef) exp(-coef) lower .95 upper .95
## rep 1.17 0.858 0.952 1.43
##
## Concordance= 0.517 (se = 0.015 )
## Rsquare= 0 (max possible= 0.615 )
## Likelihood ratio test= 2.19 on 1 df, p=0.1
## Wald test = 2.2 on 1 df, p=0.1
## Score (logrank) test = 2.21 on 1 df, p=0.1
###########################################################
#clean up
rm(list=ls())
#packages
library(lme4)
library(nlme)
library(lattice)
library(geepack)
library(reshape)
###REPEATED MEASURES###
headache<-read.table("headache.txt", header=TRUE, sep="")
head(headache)
## id center treatment.a sequence period treatment.b response
## 1 1 1 B 1 0 B 0.00
## 2 1 1 A 1 1 A 11.50
## 3 2 1 A 2 0 A 11.75
## 4 2 1 B 2 1 B 13.75
## 5 3 1 A 2 0 A 0.50
## 6 3 1 B 2 1 B 8.25
table(headache$treatment.a[headache$sequence==1],
headache$period[headache$sequence==1])
##
## 0 1
## A 0 127
## B 127 0
## P 0 0
table(headache$treatment.a[headache$sequence==2],
headache$period[headache$sequence==2])
##
## 0 1
## A 126 0
## B 0 126
## P 0 0
table(headache$treatment.a[headache$sequence==3],
headache$period[headache$sequence==3])
##
## 0 1
## A 0 0
## B 0 42
## P 42 0
table(headache$treatment.a[headache$sequence==4],
headache$period[headache$sequence==4])
##
## 0 1
## A 0 0
## B 42 0
## P 0 42
table(headache$treatment.a[headache$sequence==5],
headache$period[headache$sequence==5])
##
## 0 1
## A 0 43
## B 0 0
## P 43 0
table(headache$treatment.a[headache$sequence==6],
headache$period[headache$sequence==6])
##
## 0 1
## A 43 0
## B 0 0
## P 0 43
#relevel treatment
headache$treatment.a<-relevel(headache$treatment.a, "P")
#carryover effects
headache$carry.a<-as.numeric(headache$sequence%in%c(2,6) & headache$period==1)
headache$carry.b<-as.numeric(headache$sequence%in%c(1,4) & headache$period==1)
#MODEL WITH CARRYOVER EFFECTS
headache.mod <- lmer(response~as.factor(treatment.a)+
period+carry.a+carry.b+(1|id),
data=headache)
summary(headache.mod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: response ~ as.factor(treatment.a) + period + carry.a + carry.b +
## (1 | id)
## Data: headache
##
## REML criterion at convergence: 4536
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8177 -0.5218 0.0942 0.6070 2.2486
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 5.05 2.25
## Residual 8.43 2.90
## Number of obs: 846, groups: id, 423
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 7.815 0.304 25.70
## as.factor(treatment.a)A 2.506 0.349 7.19
## as.factor(treatment.a)B 1.952 0.348 5.61
## period 0.301 0.415 0.73
## carry.a -0.878 0.526 -1.67
## carry.b 0.186 0.524 0.36
##
## Correlation of Fixed Effects:
## (Intr) a.(.)A a.(.)B period carry.
## as.fctr(.)A -0.716
## as.fctr(.)B -0.716 0.565
## period 0.143 -0.330 -0.326
## carry.a -0.299 0.490 0.164 -0.763
## carry.b -0.296 0.163 0.485 -0.761 0.509
#Differences in treatment effects
num<-2.5057-1.9519; num
## [1] 0.554
dem<- sqrt((0.3486)^2 -2* 0.565*(0.3486)*(0.3480)+(0.3480)^2); dem
## [1] 0.325
z<-num/dem; z
## [1] 1.7
p<-2*(1-pnorm(abs(z))); p
## [1] 0.0883
#Differences in carryover effects
num.2<- -0.8775-0.1864; num.2
## [1] -1.06
dem.2<- sqrt((0.5257)^2 -2*0.509*(0.5257)*(0.5237)+(0.5237)^2); dem.2
## [1] 0.52
z.2<-num.2/dem.2; z.2
## [1] -2.05
p.2<-2*(1-pnorm(abs(z.2))); p.2
## [1] 0.0407
#MODEL WITH NO CARRYOVER
no.carryover <- lmer(response~as.factor(treatment.a)+
period+(1|id),
data=headache)
summary(no.carryover)
## Linear mixed model fit by REML ['lmerMod']
## Formula: response ~ as.factor(treatment.a) + period + (1 | id)
## Data: headache
##
## REML criterion at convergence: 4541
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8262 -0.5288 0.0989 0.6108 2.1413
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 5.10 2.26
## Residual 8.43 2.90
## Number of obs: 846, groups: id, 423
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 7.7341 0.2861 27.04
## as.factor(treatment.a)A 2.8410 0.3021 9.40
## as.factor(treatment.a)B 1.8202 0.3027 6.01
## period 0.0242 0.1996 0.12
##
## Correlation of Fixed Effects:
## (Intr) a.(.)A a.(.)B
## as.fctr(.)A -0.721
## as.fctr(.)B -0.721 0.707
## period -0.349 -0.001 0.001
#Differences in treatment effects
num.3<-2.84104-1.82015; num.3
## [1] 1.02
dem.3<- sqrt((0.30209)^2 -2*0.707*(0.30209)*(0.30269)+(0.30269)^2); dem.3
## [1] 0.231
z.3<-num.3/dem.3; z.3
## [1] 4.41
p.3<-2*(1-pnorm(abs(z.3))); p.3
## [1] 0.0000103
###MULTIPLE SOURCES###
#clean up
rm(list=ls())
#load data
ccs<-read.table("ccs.txt", header=TRUE, sep="",na.strings = ".")
#reshape long and then SORT!!!
ccs.long<-melt.data.frame(data=ccs,
measure.vars=c("parentRep","teacherRep"),
id=c("childID", "physical", "singlePar"))
ccs.long<-with(ccs.long,ccs.long[order(childID,variable),])
#relevel informant variable
ccs.long$variable<-relevel(ccs.long$variable, "teacherRep")
#GEE model
ccs.mod<-geepack::geeglm(value~as.factor(variable)*physical+singlePar,
id=childID,
family=binomial(link="logit"),
data=ccs.long,
scale.fix=TRUE,
corstr="exchangeable")
summary(ccs.mod)
##
## Call:
## geepack::geeglm(formula = value ~ as.factor(variable) * physical +
## singlePar, family = binomial(link = "logit"), data = ccs.long,
## id = childID, corstr = "exchangeable", scale.fix = TRUE)
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) -1.6801 0.0999 282.74 < 2e-16
## as.factor(variable)parentRep -0.4738 0.1180 16.13 0.000059206
## physical 0.1423 0.1350 1.11 0.2917
## singlePar 0.6137 0.1076 32.54 0.000000012
## as.factor(variable)parentRep:physical 0.4573 0.1571 8.47 0.0036
##
## (Intercept) ***
## as.factor(variable)parentRep ***
## physical
## singlePar ***
## as.factor(variable)parentRep:physical **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Scale is fixed.
##
## Correlation: Structure = exchangeable Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.268 0.026
## Number of clusters: 2501 Maximum cluster size: 2
###########################################################
###REPEATED EVENTS COX MODEL###
##MILITARIZED INTERVENTIONS##
#clean up
rm(list=ls())
#load packages
library(foreign)
library(survival)
#load data
#http://psfaculty.ucdavis.edu/bsjjones/icpsr_omi_spellsplit.dta
interventions<-read.dta("icpsr_omi_spellsplit.dta")
#Model: compare to Box-Steffensmeier & Jones Table 10.2.
#Note: frailty terms would be ideal.
conditional.interventions<-clogit(event~pbal+ctg+idem+tdem+strata(RS),
data=interventions)
summary(conditional.interventions)
## Call:
## coxph(formula = Surv(rep(1, 12780L), event) ~ pbal + ctg + idem +
## tdem + strata(RS), data = interventions, method = "exact")
##
## n= 9182, number of events= 500
## (3598 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## pbal -0.52876 0.58933 0.15871 -3.33 0.00086 ***
## ctg -0.29427 0.74508 0.10779 -2.73 0.00633 **
## idem 0.01069 1.01074 0.00646 1.65 0.09794 .
## tdem 0.01641 1.01655 0.00720 2.28 0.02257 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## pbal 0.589 1.697 0.432 0.804
## ctg 0.745 1.342 0.603 0.920
## idem 1.011 0.989 0.998 1.024
## tdem 1.017 0.984 1.002 1.031
##
## Concordance= 0.566 (se = 0.016 )
## Rsquare= 0.003 (max possible= 0.296 )
## Likelihood ratio test= 24.1 on 4 df, p=0.00008
## Wald test = 24.6 on 4 df, p=0.00006
## Score (logrank) test = 24.7 on 4 df, p=0.00006
###COMPETING RISKS COX MODEL###
##MELANOMA PATIENT SURVIVAL TIME##
#clean up
rm(list=ls())
#load package and data
library(riskRegression)
## Loading required package: data.table
##
## Attaching package: 'data.table'
## The following object is masked from 'package:plm':
##
## between
## The following object is masked from 'package:reshape':
##
## melt
## Loading required package: ggplot2
## Loading required package: prodlim
## riskRegression version 2018.10.03
data(Melanoma)
#backup file: Melanoma.csv
#Plan A: Different predictors for each cause.
fit1 <- riskRegression::CSC(list(Hist(time,status)~
sex,Hist(time,status)~
invasion+epicel+age),
data=Melanoma)
print(fit1)
## riskRegression::CSC(formula = list(Hist(time, status) ~ sex,
## Hist(time, status) ~ invasion + epicel + age), data = Melanoma)
##
## Right-censored response of a competing.risks model
##
## No.Observations: 205
##
## Pattern:
##
## Cause event right.censored
## 1 57 0
## 2 14 0
## unknown 0 134
##
##
## ----------> Cause: 1
##
## Call:
## survival::coxph(formula = survival::Surv(time, status) ~ sex,
## x = TRUE, y = TRUE)
##
## n= 205, number of events= 57
##
## coef exp(coef) se(coef) z Pr(>|z|)
## sexMale 0.662 1.939 0.265 2.5 0.013 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## sexMale 1.94 0.516 1.15 3.26
##
## Concordance= 0.59 (se = 0.034 )
## Rsquare= 0.03 (max possible= 0.937 )
## Likelihood ratio test= 6.15 on 1 df, p=0.01
## Wald test = 6.24 on 1 df, p=0.01
## Score (logrank) test = 6.47 on 1 df, p=0.01
##
##
##
## ----------> Cause: 2
##
## Call:
## survival::coxph(formula = survival::Surv(time, status) ~ invasion +
## epicel + age, x = TRUE, y = TRUE)
##
## n= 205, number of events= 14
##
## coef exp(coef) se(coef) z Pr(>|z|)
## invasionlevel.1 -0.9130 0.4013 0.6411 -1.42 0.15438
## invasionlevel.2 -1.2766 0.2790 1.1170 -1.14 0.25309
## epicelpresent 0.3224 1.3804 0.5701 0.57 0.57173
## age 0.0932 1.0977 0.0261 3.58 0.00035 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## invasionlevel.1 0.401 2.492 0.1142 1.41
## invasionlevel.2 0.279 3.584 0.0312 2.49
## epicelpresent 1.380 0.724 0.4516 4.22
## age 1.098 0.911 1.0431 1.16
##
## Concordance= 0.83 (se = 0.046 )
## Rsquare= 0.088 (max possible= 0.481 )
## Likelihood ratio test= 19 on 4 df, p=0.0008
## Wald test = 14 on 4 df, p=0.007
## Score (logrank) test = 15 on 4 df, p=0.005
#Plan B: Same predictors for each cause.
fit2 <- CSC(Hist(time,status)~
sex+invasion+epicel+age,
data=Melanoma)
print(fit2)
## CSC(formula = Hist(time, status) ~ sex + invasion + epicel +
## age, data = Melanoma)
##
## Right-censored response of a competing.risks model
##
## No.Observations: 205
##
## Pattern:
##
## Cause event right.censored
## 1 57 0
## 2 14 0
## unknown 0 134
##
##
## ----------> Cause: 1
##
## Call:
## survival::coxph(formula = survival::Surv(time, status) ~ sex +
## invasion + epicel + age, x = TRUE, y = TRUE)
##
## n= 205, number of events= 57
##
## coef exp(coef) se(coef) z Pr(>|z|)
## sexMale 0.81455 2.25817 0.27080 3.01 0.00263 **
## invasionlevel.1 0.95490 2.59842 0.32593 2.93 0.00339 **
## invasionlevel.2 1.37028 3.93647 0.38333 3.57 0.00035 ***
## epicelpresent -0.96361 0.38151 0.30587 -3.15 0.00163 **
## age 0.01624 1.01637 0.00844 1.92 0.05439 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## sexMale 2.258 0.443 1.328 3.839
## invasionlevel.1 2.598 0.385 1.372 4.922
## invasionlevel.2 3.936 0.254 1.857 8.344
## epicelpresent 0.382 2.621 0.209 0.695
## age 1.016 0.984 1.000 1.033
##
## Concordance= 0.731 (se = 0.036 )
## Rsquare= 0.168 (max possible= 0.937 )
## Likelihood ratio test= 37.6 on 5 df, p=0.0000005
## Wald test = 33.7 on 5 df, p=0.000003
## Score (logrank) test = 35.9 on 5 df, p=0.000001
##
##
##
## ----------> Cause: 2
##
## Call:
## survival::coxph(formula = survival::Surv(time, status) ~ sex +
## invasion + epicel + age, x = TRUE, y = TRUE)
##
## n= 205, number of events= 14
##
## coef exp(coef) se(coef) z Pr(>|z|)
## sexMale 0.2498 1.2838 0.5560 0.45 0.6532
## invasionlevel.1 -0.8908 0.4103 0.6429 -1.39 0.1659
## invasionlevel.2 -1.2675 0.2815 1.1246 -1.13 0.2597
## epicelpresent 0.2762 1.3181 0.5806 0.48 0.6343
## age 0.0914 1.0957 0.0262 3.48 0.0005 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## sexMale 1.284 0.779 0.4317 3.82
## invasionlevel.1 0.410 2.437 0.1164 1.45
## invasionlevel.2 0.282 3.552 0.0311 2.55
## epicelpresent 1.318 0.759 0.4224 4.11
## age 1.096 0.913 1.0408 1.15
##
## Concordance= 0.837 (se = 0.047 )
## Rsquare= 0.089 (max possible= 0.481 )
## Likelihood ratio test= 19.2 on 5 df, p=0.002
## Wald test = 14.4 on 5 df, p=0.01
## Score (logrank) test = 15.8 on 5 df, p=0.008
###MULTINOMIAL LOGIT COMPETING RISKS MODEL###
##MEANS OF CONGRESSIONAL EXIT##
#clean up
rm(list=ls())
#load package
library(nnet)
##
## Attaching package: 'nnet'
## The following object is masked from 'package:mgcv':
##
## multinom
library(foreign)
#Load Data
cong<-read.dta('career.dta')
#model: compare to Box-Steffensmeier & Jones Table 10.6
#Note: frailty terms would be ideal. May need "MCMCglmm"
four.exits<-multinom(event~rep+redist+scandal+opengov+
opensen+leader+age+priorm+log(duration),
data=cong)
## # weights: 55 (40 variable)
## initial value 8737.638427
## iter 10 value 4831.129941
## iter 20 value 3948.051135
## iter 30 value 3296.132728
## iter 40 value 3077.559021
## iter 50 value 3065.565411
## final value 3064.504945
## converged
summary(four.exits)
## Call:
## multinom(formula = event ~ rep + redist + scandal + opengov +
## opensen + leader + age + priorm + log(duration), data = cong)
##
## Coefficients:
## (Intercept) rep redist scandal opengov opensen leader age
## 1 -3.05 -0.1259 1.61 2.76 0.0820 -0.2812 -0.591 0.0387
## 2 -6.23 -0.0064 1.42 3.29 0.2325 -0.4574 -25.165 0.0434
## 3 -8.02 0.1864 1.36 1.23 0.0368 0.0943 -0.381 0.0820
## 4 -1.18 0.2823 1.52 -13.27 0.5099 1.0293 -1.557 -0.0591
## priorm log(duration)
## 1 -0.0571540 -0.314
## 2 -0.0048450 -0.167
## 3 -0.0100659 0.515
## 4 -0.0000927 0.491
##
## Std. Errors:
## (Intercept) rep redist scandal opengov opensen leader
## 1 0.357 0.125 0.291 0.3810760708 0.157 0.206 0.5237159581173
## 2 0.752 0.259 0.553 0.4326457972 0.307 0.434 0.0000000000035
## 3 0.461 0.143 0.327 0.4171685839 0.173 0.203 0.3068835106453
## 4 0.425 0.144 0.323 0.0000000935 0.156 0.157 1.0166972257154
## age priorm log(duration)
## 1 0.00733 0.00500 0.0925
## 2 0.01516 0.00494 0.1912
## 3 0.00861 0.00286 0.1178
## 4 0.00985 0.00269 0.1220
##
## Residual Deviance: 6129
## AIC: 6209
###########################################################
###CHAPTER 8: USING PACKAGES TO APPLY ADVANCED MODELS###
##REQUIRED DATA FILES: BPchap7.dta, SinghJTP.dta, LL.csv, and UN.csv
##SECTION 8.1: MULTILEVEL MODELS WITH lme4##
#clean up
rm(list=ls())
#load package
library(foreign)
#load and clean data
#evolution<-read.dta("http://j.mp/BPchap7")
evolution<-read.dta("BPchap7.dta")
evolution$female[evolution$female==9]<-NA
evolution<-subset(evolution,!is.na(female))
#load package
#install.packages("lme4")
library(lme4)
#estimate linear model of hours spent teaching evolution
#with random effects by state
hours.ml<-lmer(hrs_allev~phase1+senior_c+ph_senior+
notest_p+ph_notest_p+female+biocred3+
degr3+evol_course+certified+idsci_trans+
confident+(1|st_fip),
data=evolution)
summary(hours.ml)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## hrs_allev ~ phase1 + senior_c + ph_senior + notest_p + ph_notest_p +
## female + biocred3 + degr3 + evol_course + certified + idsci_trans +
## confident + (1 | st_fip)
## Data: evolution
##
## REML criterion at convergence: 5940
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.348 -0.714 -0.175 0.557 3.885
##
## Random effects:
## Groups Name Variance Std.Dev.
## st_fip (Intercept) 3.09 1.76
## Residual 67.87 8.24
## Number of obs: 841, groups: st_fip, 49
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 10.568 1.214 8.71
## phase1 0.758 0.443 1.71
## senior_c -0.529 0.310 -1.71
## ph_senior -0.527 0.270 -1.95
## notest_p 0.113 0.749 0.15
## ph_notest_p -0.527 0.660 -0.80
## female -0.970 0.603 -1.61
## biocred3 0.516 0.504 1.02
## degr3 -0.443 0.389 -1.14
## evol_course 2.389 0.627 3.81
## certified -0.533 0.719 -0.74
## idsci_trans 1.728 1.116 1.55
## confident 2.674 0.447 5.98
##
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
#SECTION 8.1.2: MULTILEVEL LOGISTIC REGRESSION#
#load packages
library(lme4)
library(foreign)
#load data
#voting<-read.dta("http://j.mp/SINGHjtp")
voting<-read.dta("SinghJTP.dta")
#estimate logistic regression model of voting for incumbents
#with random effects by country-year
inc.linear.ml<-glmer(votedinc~distanceinc+(1|cntryyear),
family=binomial(link="logit"),
data=voting)
summary(inc.linear.ml)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: votedinc ~ distanceinc + (1 | cntryyear)
## Data: voting
##
## AIC BIC logLik deviance df.resid
## 41999 42025 -20996 41993 38208
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.541 -0.680 -0.428 0.955 14.835
##
## Random effects:
## Groups Name Variance Std.Dev.
## cntryyear (Intercept) 0.207 0.455
## Number of obs: 38211, groups: cntryyear, 30
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.16175 0.08545 1.89 0.058 .
## distanceinc -0.50122 0.00888 -56.47 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## distanceinc -0.184
#estimate logistic regression model of voting for incumbents
#with random intercepts and random coefficients on ideological
#distance by country-year
inc.linear.ml.2<-glmer(votedinc~distanceinc+(distanceinc|cntryyear),
family=binomial(link="logit"),
data=voting)
summary(inc.linear.ml.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: votedinc ~ distanceinc + (distanceinc | cntryyear)
## Data: voting
##
## AIC BIC logLik deviance df.resid
## 41074 41117 -20532 41064 38206
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.067 -0.701 -0.415 0.920 28.018
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## cntryyear (Intercept) 0.6167 0.785
## distanceinc 0.0981 0.313 -0.81
## Number of obs: 38211, groups: cntryyear, 30
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.2620 0.1450 1.81 0.071 .
## distanceinc -0.5305 0.0581 -9.13 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## distanceinc -0.807
###THREE-LEVEL EXAMPLE FROM FITZMAURICE, LAIRD AND WARE TABLE 22.3###
#clean up
rm(list=ls())
#required libraries
library(lme4)
#load data
smoking<-read.table("tvsfp.txt", header=TRUE)
#Three-Level Model (students within classes within schools)
smoking.mod<-lmer(post~pre+schT+tvT+schT*tvT+
(1|schoolID)+(1|classID),
data=smoking)
summary(smoking.mod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: post ~ pre + schT + tvT + schT * tvT + (1 | schoolID) + (1 |
## classID)
## Data: smoking
##
## REML criterion at convergence: 5373
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4987 -0.6976 -0.0172 0.6824 3.1460
##
## Random effects:
## Groups Name Variance Std.Dev.
## classID (Intercept) 0.0647 0.254
## schoolID (Intercept) 0.0386 0.197
## Residual 1.6023 1.266
## Number of obs: 1600, groups: classID, 135; schoolID, 28
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.7020 0.1254 13.57
## pre 0.3054 0.0259 11.79
## schT 0.6413 0.1609 3.98
## tvT 0.1821 0.1572 1.16
## schT:tvT -0.3309 0.2246 -1.47
##
## Correlation of Fixed Effects:
## (Intr) pre schT tvT
## pre -0.442
## schT -0.634 0.015
## tvT -0.645 0.008 0.501
## schT:tvT 0.448 0.005 -0.716 -0.700
###Another neat dataset from Gelman & Hill. CBS 1988 exit poll data.###
#Logit Model of Bush Support in 1988 as a function of race and sex,
#with a random effect for state
#elec.88<-read.table("http://www.stat.columbia.edu/~gelman/arm/examples/election88/polls.subset.dat", header=TRUE)
###########################################################
#load package
library(pwr)
###Difference of means test?
###What sample for an effect size of 1?
###Number of observations per group.###
#Note: Effect size as defined by Cohen:
#absolute difference divided by standard deviation
pwr.t.test(d=1, sig.level=.05, power=.8,
type="two.sample", alternative="two.sided")
##
## Two-sample t test power calculation
##
## n = 16.7
## d = 1
## sig.level = 0.05
## power = 0.8
## alternative = two.sided
##
## NOTE: n is number in *each* group
#Hand calculation if you were content with a normal distribution.
#Number of observations per group.
(qnorm(.975)+qnorm(.8))^2/(.5*(1-.5))/2
## [1] 15.7
#Hand calculation if you wanted to hunt-and-pack t-distribution
#for degrees of freedom. Number of observations per group.
(qt(.975,df=32)+qt(.8,df=32))^2/(.5*(1-.5))/2
## [1] 16.7
###Two samples of different sizes. Pick one group,
###find the size for the other group.###
pwr.t2n.test(d=0.6,n1=90,n2=NULL,alternative="greater",power=.8)
##
## t test power calculation
##
## n1 = 90
## n2 = 21.6
## d = 0.6
## sig.level = 0.05
## power = 0.8
## alternative = greater
pwr.t2n.test(d=0.6,n1=NULL,n2=60,alternative="greater",power=.8)
##
## t test power calculation
##
## n1 = 24.6
## n2 = 60
## d = 0.6
## sig.level = 0.05
## power = 0.8
## alternative = greater
###ANOVA: Can we explain 2% of variance with 1 predictor?
###Number of observations overall.###
pwr.f2.test(u=1, f2=.02/.98, sig.level=.05, power=.8)
##
## Multiple regression power calculation
##
## u = 1
## v = 385
## f2 = 0.0204
## sig.level = 0.05
## power = 0.8
#Can we detect joint significance of three predictors if
#they can explain an extra 10 percentage poitns of variance
#if the model explains 60% total?
pwr.f2.test(u=3, f2=.10/.40, sig.level=.05, power=.8)
##
## Multiple regression power calculation
##
## u = 3
## v = 43.7
## f2 = 0.25
## sig.level = 0.05
## power = 0.8
###Panel Example: Section 20.3.4 in book. Number of observations overall.###
#hard coded version
sigma.beta.2<-2.8 #variance of coefficient of interest
delta<-1.2 #minimum treatment effect
(((qnorm(.975)+qnorm(.9))^2)*sigma.beta.2)/(.5*(1-.5)*delta^2) #result
## [1] 81.7
#Equation 20.2 as a Function#
#Note that you may want to consider Euqation 20.4 to get a sense of sigma.beta.2.
eqn.20.2<-function(alpha,power,delta,sigma.beta.2,split){
sample.size<-(((qnorm(1-(alpha/2))+qnorm(power))^2)*sigma.beta.2)/
(split*(1-split)*delta^2)
return(sample.size)
}
#replicate
eqn.20.2(.05,.9,1.2,2.8,.5)
## [1] 81.7
#content with 80% power
eqn.20.2(alpha=.05,power=.8,delta=1.2,sigma.beta.2=2.8,split=.5)
## [1] 61